diff options
author | johannkoenig@chromium.org <johannkoenig@chromium.org@4ff67af0-8c30-449e-8e8b-ad334ec8d88c> | 2013-10-31 23:00:00 +0000 |
---|---|---|
committer | johannkoenig@chromium.org <johannkoenig@chromium.org@4ff67af0-8c30-449e-8e8b-ad334ec8d88c> | 2013-10-31 23:00:00 +0000 |
commit | 085cab8fbb48aa8b9f7e3e6d5a2694afd0ffe2e0 (patch) | |
tree | 42980681a06beed06a29aeb8b20002dd798b8571 /source | |
parent | 9af24c977de5b020fffaf66d24a9f5725423aaaa (diff) | |
download | libvpx-085cab8fbb48aa8b9f7e3e6d5a2694afd0ffe2e0.tar.gz |
libvpx: Pull from upstream
Continues the checkin started in:
https://codereview.chromium.org/55493002/
R=tomfinegan@chromium.org
Review URL: https://codereview.chromium.org/54293005
git-svn-id: http://src.chromium.org/svn/trunk/deps/third_party/libvpx@232232 4ff67af0-8c30-449e-8e8b-ad334ec8d88c
Diffstat (limited to 'source')
54 files changed, 1247 insertions, 2337 deletions
diff --git a/source/libvpx/configure b/source/libvpx/configure index 297cec4..621161c 100755 --- a/source/libvpx/configure +++ b/source/libvpx/configure @@ -234,6 +234,8 @@ ARCH_EXT_LIST=" sse3 ssse3 sse4_1 + avx + avx2 altivec " @@ -422,7 +424,7 @@ process_targets() { fi # The write_common_config (config.mk) logic is deferred until after the - # recursive calls to configure complete, becuase we want our universal + # recursive calls to configure complete, because we want our universal # targets to be executed last. write_common_config_targets enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk @@ -608,7 +610,12 @@ process_toolchain() { check_add_cflags -Wuninitialized check_add_cflags -Wunused-variable case ${CC} in - *clang*) ;; + *clang*) + # libvpx and/or clang have issues with aliasing: + # https://code.google.com/p/webm/issues/detail?id=603 + # work around them until they are fixed + check_add_cflags -fno-strict-aliasing + ;; *) check_add_cflags -Wunused-but-set-variable ;; esac enabled extra_warnings || check_add_cflags -Wno-unused-function diff --git a/source/libvpx/examples.mk b/source/libvpx/examples.mk index c17fac9..88327fe 100644 --- a/source/libvpx/examples.mk +++ b/source/libvpx/examples.mk @@ -40,18 +40,18 @@ vpxenc.SRCS += tools_common.c tools_common.h vpxenc.SRCS += vpx_ports/mem_ops.h vpxenc.SRCS += vpx_ports/mem_ops_aligned.h vpxenc.SRCS += vpx_ports/vpx_timer.h -vpxenc.SRCS += libmkv/EbmlIDs.h -vpxenc.SRCS += libmkv/EbmlWriter.c -vpxenc.SRCS += libmkv/EbmlWriter.h +vpxenc.SRCS += third_party/libmkv/EbmlIDs.h +vpxenc.SRCS += third_party/libmkv/EbmlWriter.c +vpxenc.SRCS += third_party/libmkv/EbmlWriter.h vpxenc.SRCS += $(LIBYUV_SRCS) vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 vpxenc.DESCRIPTION = Full featured encoder UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder -UTILS-$(CONFIG_VP8_ENCODER) += vp9_spatial_scalable_encoder.c -vp8_scalable_patterns.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D -vp8_scalable_patterns.DESCRIPTION = Spatial Scalable Encoder +UTILS-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c +vp9_spatial_scalable_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D +vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder # Clean up old ivfenc, ivfdec binaries. ifeq ($(CONFIG_MSVS),yes) diff --git a/source/libvpx/examples/decoder_tmpl.c b/source/libvpx/examples/decoder_tmpl.c index 597fea2..3e55352 100644 --- a/source/libvpx/examples/decoder_tmpl.c +++ b/source/libvpx/examples/decoder_tmpl.c @@ -12,14 +12,14 @@ /* @*INTRODUCTION */ -#include "vpx_config.h" +#include <stdarg.h> #include <stdio.h> #include <stdlib.h> -#include <stdarg.h> #include <string.h> #define VPX_CODEC_DISABLE_COMPAT 1 -#include "vpx/vpx_decoder.h" +#include "./vpx_config.h" #include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" #define interface (vpx_codec_vp8_dx()) @EXTRA_INCLUDES diff --git a/source/libvpx/libmkv/EbmlBufferWriter.c b/source/libvpx/libmkv/EbmlBufferWriter.c deleted file mode 100644 index 574e478..0000000 --- a/source/libvpx/libmkv/EbmlBufferWriter.c +++ /dev/null @@ -1,54 +0,0 @@ -// #include <strmif.h> -#include "EbmlBufferWriter.h" -#include "EbmlWriter.h" -// #include <cassert> -// #include <limits> -// #include <malloc.h> //_alloca -#include <stdlib.h> -#include <wchar.h> -#include <string.h> - -void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) { - unsigned char *src = glob->buf; - src += glob->offset; - memcpy(src, buffer_in, len); - glob->offset += len; -} - -static void _Serialize(EbmlGlobal *glob, const unsigned char *p, const unsigned char *q) { - while (q != p) { - --q; - - unsigned long cbWritten; - memcpy(&(glob->buf[glob->offset]), q, 1); - glob->offset++; - } -} - -void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len) { - // assert(buf); - - const unsigned char *const p = (const unsigned char *)(buffer_in); - const unsigned char *const q = p + len; - - _Serialize(glob, p, q); -} - - -void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id) { - Ebml_WriteID(glob, class_id); - ebmlLoc->offset = glob->offset; - // todo this is always taking 8 bytes, this may need later optimization - unsigned long long unknownLen = 0x01FFFFFFFFFFFFFFLLU; - Ebml_Serialize(glob, (void *)&unknownLen, 8); // this is a key that says lenght unknown -} - -void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) { - unsigned long long size = glob->offset - ebmlLoc->offset - 8; - unsigned long long curOffset = glob->offset; - glob->offset = ebmlLoc->offset; - size |= 0x0100000000000000LLU; - Ebml_Serialize(glob, &size, 8); - glob->offset = curOffset; -} - diff --git a/source/libvpx/libmkv/EbmlBufferWriter.h b/source/libvpx/libmkv/EbmlBufferWriter.h deleted file mode 100644 index acd5c2a..0000000 --- a/source/libvpx/libmkv/EbmlBufferWriter.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef EBMLBUFFERWRITER_HPP -#define EBMLBUFFERWRITER_HPP - -typedef struct { - unsigned long long offset; -} EbmlLoc; - -typedef struct { - unsigned char *buf; - unsigned int length; - unsigned int offset; -} EbmlGlobal; - - -void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id); -void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc); - - -#endif diff --git a/source/libvpx/libmkv/EbmlIDs.h b/source/libvpx/libmkv/EbmlIDs.h deleted file mode 100644 index 44d4385..0000000 --- a/source/libvpx/libmkv/EbmlIDs.h +++ /dev/null @@ -1,231 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef MKV_DEFS_HPP -#define MKV_DEFS_HPP 1 - -/* Commenting out values not available in webm, but available in matroska */ - -enum mkv { - EBML = 0x1A45DFA3, - EBMLVersion = 0x4286, - EBMLReadVersion = 0x42F7, - EBMLMaxIDLength = 0x42F2, - EBMLMaxSizeLength = 0x42F3, - DocType = 0x4282, - DocTypeVersion = 0x4287, - DocTypeReadVersion = 0x4285, -/* CRC_32 = 0xBF, */ - Void = 0xEC, - SignatureSlot = 0x1B538667, - SignatureAlgo = 0x7E8A, - SignatureHash = 0x7E9A, - SignaturePublicKey = 0x7EA5, - Signature = 0x7EB5, - SignatureElements = 0x7E5B, - SignatureElementList = 0x7E7B, - SignedElement = 0x6532, - /* segment */ - Segment = 0x18538067, - /* Meta Seek Information */ - SeekHead = 0x114D9B74, - Seek = 0x4DBB, - SeekID = 0x53AB, - SeekPosition = 0x53AC, - /* Segment Information */ - Info = 0x1549A966, -/* SegmentUID = 0x73A4, */ -/* SegmentFilename = 0x7384, */ -/* PrevUID = 0x3CB923, */ -/* PrevFilename = 0x3C83AB, */ -/* NextUID = 0x3EB923, */ -/* NextFilename = 0x3E83BB, */ -/* SegmentFamily = 0x4444, */ -/* ChapterTranslate = 0x6924, */ -/* ChapterTranslateEditionUID = 0x69FC, */ -/* ChapterTranslateCodec = 0x69BF, */ -/* ChapterTranslateID = 0x69A5, */ - TimecodeScale = 0x2AD7B1, - Segment_Duration = 0x4489, - DateUTC = 0x4461, -/* Title = 0x7BA9, */ - MuxingApp = 0x4D80, - WritingApp = 0x5741, - /* Cluster */ - Cluster = 0x1F43B675, - Timecode = 0xE7, -/* SilentTracks = 0x5854, */ -/* SilentTrackNumber = 0x58D7, */ -/* Position = 0xA7, */ - PrevSize = 0xAB, - BlockGroup = 0xA0, - Block = 0xA1, -/* BlockVirtual = 0xA2, */ - BlockAdditions = 0x75A1, - BlockMore = 0xA6, - BlockAddID = 0xEE, - BlockAdditional = 0xA5, - BlockDuration = 0x9B, -/* ReferencePriority = 0xFA, */ - ReferenceBlock = 0xFB, -/* ReferenceVirtual = 0xFD, */ -/* CodecState = 0xA4, */ -/* Slices = 0x8E, */ -/* TimeSlice = 0xE8, */ - LaceNumber = 0xCC, -/* FrameNumber = 0xCD, */ -/* BlockAdditionID = 0xCB, */ -/* MkvDelay = 0xCE, */ -/* Cluster_Duration = 0xCF, */ - SimpleBlock = 0xA3, -/* EncryptedBlock = 0xAF, */ - /* Track */ - Tracks = 0x1654AE6B, - TrackEntry = 0xAE, - TrackNumber = 0xD7, - TrackUID = 0x73C5, - TrackType = 0x83, - FlagEnabled = 0xB9, - FlagDefault = 0x88, - FlagForced = 0x55AA, - FlagLacing = 0x9C, -/* MinCache = 0x6DE7, */ -/* MaxCache = 0x6DF8, */ - DefaultDuration = 0x23E383, -/* TrackTimecodeScale = 0x23314F, */ -/* TrackOffset = 0x537F, */ - MaxBlockAdditionID = 0x55EE, - Name = 0x536E, - Language = 0x22B59C, - CodecID = 0x86, - CodecPrivate = 0x63A2, - CodecName = 0x258688, -/* AttachmentLink = 0x7446, */ -/* CodecSettings = 0x3A9697, */ -/* CodecInfoURL = 0x3B4040, */ -/* CodecDownloadURL = 0x26B240, */ -/* CodecDecodeAll = 0xAA, */ -/* TrackOverlay = 0x6FAB, */ -/* TrackTranslate = 0x6624, */ -/* TrackTranslateEditionUID = 0x66FC, */ -/* TrackTranslateCodec = 0x66BF, */ -/* TrackTranslateTrackID = 0x66A5, */ - /* video */ - Video = 0xE0, - FlagInterlaced = 0x9A, - StereoMode = 0x53B8, - AlphaMode = 0x53C0, - PixelWidth = 0xB0, - PixelHeight = 0xBA, - PixelCropBottom = 0x54AA, - PixelCropTop = 0x54BB, - PixelCropLeft = 0x54CC, - PixelCropRight = 0x54DD, - DisplayWidth = 0x54B0, - DisplayHeight = 0x54BA, - DisplayUnit = 0x54B2, - AspectRatioType = 0x54B3, -/* ColourSpace = 0x2EB524, */ -/* GammaValue = 0x2FB523, */ - FrameRate = 0x2383E3, - /* end video */ - /* audio */ - Audio = 0xE1, - SamplingFrequency = 0xB5, - OutputSamplingFrequency = 0x78B5, - Channels = 0x9F, -/* ChannelPositions = 0x7D7B, */ - BitDepth = 0x6264, - /* end audio */ - /* content encoding */ -/* ContentEncodings = 0x6d80, */ -/* ContentEncoding = 0x6240, */ -/* ContentEncodingOrder = 0x5031, */ -/* ContentEncodingScope = 0x5032, */ -/* ContentEncodingType = 0x5033, */ -/* ContentCompression = 0x5034, */ -/* ContentCompAlgo = 0x4254, */ -/* ContentCompSettings = 0x4255, */ -/* ContentEncryption = 0x5035, */ -/* ContentEncAlgo = 0x47e1, */ -/* ContentEncKeyID = 0x47e2, */ -/* ContentSignature = 0x47e3, */ -/* ContentSigKeyID = 0x47e4, */ -/* ContentSigAlgo = 0x47e5, */ -/* ContentSigHashAlgo = 0x47e6, */ - /* end content encoding */ - /* Cueing Data */ - Cues = 0x1C53BB6B, - CuePoint = 0xBB, - CueTime = 0xB3, - CueTrackPositions = 0xB7, - CueTrack = 0xF7, - CueClusterPosition = 0xF1, - CueBlockNumber = 0x5378 -/* CueCodecState = 0xEA, */ -/* CueReference = 0xDB, */ -/* CueRefTime = 0x96, */ -/* CueRefCluster = 0x97, */ -/* CueRefNumber = 0x535F, */ -/* CueRefCodecState = 0xEB, */ - /* Attachment */ -/* Attachments = 0x1941A469, */ -/* AttachedFile = 0x61A7, */ -/* FileDescription = 0x467E, */ -/* FileName = 0x466E, */ -/* FileMimeType = 0x4660, */ -/* FileData = 0x465C, */ -/* FileUID = 0x46AE, */ -/* FileReferral = 0x4675, */ - /* Chapters */ -/* Chapters = 0x1043A770, */ -/* EditionEntry = 0x45B9, */ -/* EditionUID = 0x45BC, */ -/* EditionFlagHidden = 0x45BD, */ -/* EditionFlagDefault = 0x45DB, */ -/* EditionFlagOrdered = 0x45DD, */ -/* ChapterAtom = 0xB6, */ -/* ChapterUID = 0x73C4, */ -/* ChapterTimeStart = 0x91, */ -/* ChapterTimeEnd = 0x92, */ -/* ChapterFlagHidden = 0x98, */ -/* ChapterFlagEnabled = 0x4598, */ -/* ChapterSegmentUID = 0x6E67, */ -/* ChapterSegmentEditionUID = 0x6EBC, */ -/* ChapterPhysicalEquiv = 0x63C3, */ -/* ChapterTrack = 0x8F, */ -/* ChapterTrackNumber = 0x89, */ -/* ChapterDisplay = 0x80, */ -/* ChapString = 0x85, */ -/* ChapLanguage = 0x437C, */ -/* ChapCountry = 0x437E, */ -/* ChapProcess = 0x6944, */ -/* ChapProcessCodecID = 0x6955, */ -/* ChapProcessPrivate = 0x450D, */ -/* ChapProcessCommand = 0x6911, */ -/* ChapProcessTime = 0x6922, */ -/* ChapProcessData = 0x6933, */ - /* Tagging */ -/* Tags = 0x1254C367, */ -/* Tag = 0x7373, */ -/* Targets = 0x63C0, */ -/* TargetTypeValue = 0x68CA, */ -/* TargetType = 0x63CA, */ -/* Tagging_TrackUID = 0x63C5, */ -/* Tagging_EditionUID = 0x63C9, */ -/* Tagging_ChapterUID = 0x63C4, */ -/* AttachmentUID = 0x63C6, */ -/* SimpleTag = 0x67C8, */ -/* TagName = 0x45A3, */ -/* TagLanguage = 0x447A, */ -/* TagDefault = 0x4484, */ -/* TagString = 0x4487, */ -/* TagBinary = 0x4485, */ -}; -#endif diff --git a/source/libvpx/libmkv/EbmlWriter.c b/source/libvpx/libmkv/EbmlWriter.c deleted file mode 100644 index 5fc5ed2..0000000 --- a/source/libvpx/libmkv/EbmlWriter.c +++ /dev/null @@ -1,157 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#include "EbmlWriter.h" -#include <stdlib.h> -#include <wchar.h> -#include <string.h> -#include <limits.h> -#if defined(_MSC_VER) -#define LITERALU64(n) n -#else -#define LITERALU64(n) n##LLU -#endif - -void Ebml_WriteLen(EbmlGlobal *glob, int64_t val) { - /* TODO check and make sure we are not > than 0x0100000000000000LLU */ - unsigned char size = 8; /* size in bytes to output */ - - /* mask to compare for byte size */ - int64_t minVal = 0xff; - - for (size = 1; size < 8; size ++) { - if (val < minVal) - break; - - minVal = (minVal << 7); - } - - val |= (((uint64_t)0x80) << ((size - 1) * 7)); - - Ebml_Serialize(glob, (void *) &val, sizeof(val), size); -} - -void Ebml_WriteString(EbmlGlobal *glob, const char *str) { - const size_t size_ = strlen(str); - const uint64_t size = size_; - Ebml_WriteLen(glob, size); - /* TODO: it's not clear from the spec whether the nul terminator - * should be serialized too. For now we omit the null terminator. - */ - Ebml_Write(glob, str, (unsigned long)size); -} - -void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) { - const size_t strlen = wcslen(wstr); - - /* TODO: it's not clear from the spec whether the nul terminator - * should be serialized too. For now we include it. - */ - const uint64_t size = strlen; - - Ebml_WriteLen(glob, size); - Ebml_Write(glob, wstr, (unsigned long)size); -} - -void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) { - int len; - - if (class_id >= 0x01000000) - len = 4; - else if (class_id >= 0x00010000) - len = 3; - else if (class_id >= 0x00000100) - len = 2; - else - len = 1; - - Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len); -} - -void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) { - unsigned char sizeSerialized = 8 | 0x80; - Ebml_WriteID(glob, class_id); - Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); - Ebml_Serialize(glob, &ui, sizeof(ui), 8); -} - -void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) { - unsigned char size = 8; /* size in bytes to output */ - unsigned char sizeSerialized = 0; - unsigned long minVal; - - Ebml_WriteID(glob, class_id); - minVal = 0x7fLU; /* mask to compare for byte size */ - - for (size = 1; size < 4; size ++) { - if (ui < minVal) { - break; - } - - minVal <<= 7; - } - - sizeSerialized = 0x80 | size; - Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); - Ebml_Serialize(glob, &ui, sizeof(ui), size); -} -/* TODO: perhaps this is a poor name for this id serializer helper function */ -void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) { - int size; - for (size = 4; size > 1; size--) { - if (bin & 0x000000ff << ((size - 1) * 8)) - break; - } - Ebml_WriteID(glob, class_id); - Ebml_WriteLen(glob, size); - Ebml_WriteID(glob, bin); -} - -void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) { - unsigned char len = 0x88; - - Ebml_WriteID(glob, class_id); - Ebml_Serialize(glob, &len, sizeof(len), 1); - Ebml_Serialize(glob, &d, sizeof(d), 8); -} - -void Ebml_WriteSigned16(EbmlGlobal *glob, short val) { - signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8; - Ebml_Serialize(glob, &out, sizeof(out), 3); -} - -void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) { - Ebml_WriteID(glob, class_id); - Ebml_WriteString(glob, s); -} - -void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s) { - Ebml_WriteID(glob, class_id); - Ebml_WriteUTF8(glob, s); -} - -void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length) { - Ebml_WriteID(glob, class_id); - Ebml_WriteLen(glob, data_length); - Ebml_Write(glob, data, data_length); -} - -void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize) { - unsigned char tmp = 0; - unsigned long i = 0; - - Ebml_WriteID(glob, 0xEC); - Ebml_WriteLen(glob, vSize); - - for (i = 0; i < vSize; i++) { - Ebml_Write(glob, &tmp, 1); - } -} - -/* TODO Serialize Date */ diff --git a/source/libvpx/libmkv/EbmlWriter.h b/source/libvpx/libmkv/EbmlWriter.h deleted file mode 100644 index b94f757..0000000 --- a/source/libvpx/libmkv/EbmlWriter.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ -#ifndef EBMLWRITER_HPP -#define EBMLWRITER_HPP -#include <stddef.h> -#include "vpx/vpx_integer.h" - -/* note: you must define write and serialize functions as well as your own - * EBML_GLOBAL - * - * These functions MUST be implemented - */ - -typedef struct EbmlGlobal EbmlGlobal; -void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long); -void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long); - -/*****/ - -void Ebml_WriteLen(EbmlGlobal *glob, int64_t val); -void Ebml_WriteString(EbmlGlobal *glob, const char *str); -void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr); -void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id); -void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui); -void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); -void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui); -void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d); -/* TODO make this more generic to signed */ -void Ebml_WriteSigned16(EbmlGlobal *glob, short val); -void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s); -void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s); -void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length); -void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize); -/* TODO need date function */ -#endif diff --git a/source/libvpx/libmkv/Makefile b/source/libvpx/libmkv/Makefile deleted file mode 100644 index b53377b..0000000 --- a/source/libvpx/libmkv/Makefile +++ /dev/null @@ -1,25 +0,0 @@ -#Variables -CC=gcc -LINKER=gcc -FLAGS= - - -#Build Targets -EbmlWriter.o: EbmlWriter.c EbmlWriter.h - $(CC) $(FLAGS) -c EbmlWriter.c - -EbmlBufferWriter.o: EbmlBufferWriter.c EbmlBufferWriter.h - $(CC) $(FLAGS) -c EbmlBufferWriter.c - -MkvElement.o: MkvElement.c WebMElement.h - $(CC) $(FLAGS) -c MkvElement.c - -testlibmkv.o: testlibmkv.c - $(CC) $(FLAGS) -c testlibmkv.c - -testlibmkv: testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o - $(LINKER) $(FLAGS) testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o -o testlibmkv - -clean: - rm -rf *.o testlibmkv -
\ No newline at end of file diff --git a/source/libvpx/libmkv/WebMElement.c b/source/libvpx/libmkv/WebMElement.c deleted file mode 100644 index 2f79a3c..0000000 --- a/source/libvpx/libmkv/WebMElement.c +++ /dev/null @@ -1,214 +0,0 @@ -// Copyright (c) 2010 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - - -#include "EbmlBufferWriter.h" -#include "EbmlIDs.h" -#include "WebMElement.h" -#include <stdio.h> - -#define kVorbisPrivateMaxSize 4000 - -void writeHeader(EbmlGlobal *glob) { - EbmlLoc start; - Ebml_StartSubElement(glob, &start, EBML); - Ebml_SerializeUnsigned(glob, EBMLVersion, 1); - Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); // EBML Read Version - Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); // EBML Max ID Length - Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); // EBML Max Size Length - Ebml_SerializeString(glob, DocType, "webm"); // Doc Type - Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); // Doc Type Version - Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); // Doc Type Read Version - Ebml_EndSubElement(glob, &start); -} - -void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCode, - int isKeyframe, unsigned char lacingFlag, int discardable, - unsigned char *data, unsigned long dataLength) { - Ebml_WriteID(glob, SimpleBlock); - unsigned long blockLength = 4 + dataLength; - blockLength |= 0x10000000; // TODO check length < 0x0FFFFFFFF - Ebml_Serialize(glob, &blockLength, sizeof(blockLength), 4); - trackNumber |= 0x80; // TODO check track nubmer < 128 - Ebml_Write(glob, &trackNumber, 1); - // Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes - Ebml_Serialize(glob, &timeCode, sizeof(timeCode), 2); - unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable; - Ebml_Write(glob, &flags, 1); - Ebml_Write(glob, data, dataLength); -} - -static UInt64 generateTrackID(unsigned int trackNumber) { - UInt64 t = time(NULL) * trackNumber; - UInt64 r = rand(); - r = r << 32; - r += rand(); - UInt64 rval = t ^ r; - return rval; -} - -void writeVideoTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing, - char *codecId, unsigned int pixelWidth, unsigned int pixelHeight, - double frameRate) { - EbmlLoc start; - Ebml_StartSubElement(glob, &start, TrackEntry); - Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); - UInt64 trackID = generateTrackID(trackNumber); - Ebml_SerializeUnsigned(glob, TrackUID, trackID); - Ebml_SerializeString(glob, CodecName, "VP8"); // TODO shouldn't be fixed - - Ebml_SerializeUnsigned(glob, TrackType, 1); // video is always 1 - Ebml_SerializeString(glob, CodecID, codecId); - { - EbmlLoc videoStart; - Ebml_StartSubElement(glob, &videoStart, Video); - Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth); - Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); - Ebml_SerializeFloat(glob, FrameRate, frameRate); - Ebml_EndSubElement(glob, &videoStart); // Video - } - Ebml_EndSubElement(glob, &start); // Track Entry -} -void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing, - char *codecId, double samplingFrequency, unsigned int channels, - unsigned char *private, unsigned long privateSize) { - EbmlLoc start; - Ebml_StartSubElement(glob, &start, TrackEntry); - Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber); - UInt64 trackID = generateTrackID(trackNumber); - Ebml_SerializeUnsigned(glob, TrackUID, trackID); - Ebml_SerializeUnsigned(glob, TrackType, 2); // audio is always 2 - // I am using defaults for thesed required fields - /* Ebml_SerializeUnsigned(glob, FlagEnabled, 1); - Ebml_SerializeUnsigned(glob, FlagDefault, 1); - Ebml_SerializeUnsigned(glob, FlagForced, 1); - Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/ - Ebml_SerializeString(glob, CodecID, codecId); - Ebml_SerializeData(glob, CodecPrivate, private, privateSize); - - Ebml_SerializeString(glob, CodecName, "VORBIS"); // fixed for now - { - EbmlLoc AudioStart; - Ebml_StartSubElement(glob, &AudioStart, Audio); - Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency); - Ebml_SerializeUnsigned(glob, Channels, channels); - Ebml_EndSubElement(glob, &AudioStart); - } - Ebml_EndSubElement(glob, &start); -} -void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc *startInfo, unsigned long timeCodeScale, double duration) { - Ebml_StartSubElement(ebml, startInfo, Info); - Ebml_SerializeUnsigned(ebml, TimecodeScale, timeCodeScale); - Ebml_SerializeFloat(ebml, Segment_Duration, duration * 1000.0); // Currently fixed to using milliseconds - Ebml_SerializeString(ebml, 0x4D80, "QTmuxingAppLibWebM-0.0.1"); - Ebml_SerializeString(ebml, 0x5741, "QTwritingAppLibWebM-0.0.1"); - Ebml_EndSubElement(ebml, startInfo); -} - -/* -void Mkv_InitializeSegment(Ebml& ebml_out, EbmlLoc& ebmlLoc) -{ - Ebml_StartSubElement(ebml_out, ebmlLoc, 0x18538067); -} - -void Mkv_InitializeSeek(Ebml& ebml_out, EbmlLoc& ebmlLoc) -{ - Ebml_StartSubElement(ebml_out, ebmlLoc, 0x114d9b74); -} -void Mkv_WriteSeekInformation(Ebml& ebml_out, SeekStruct& seekInformation) -{ - EbmlLoc ebmlLoc; - Ebml_StartSubElement(ebml_out, ebmlLoc, 0x4dbb); - Ebml_SerializeString(ebml_out, 0x53ab, seekInformation.SeekID); - Ebml_SerializeUnsigned(ebml_out, 0x53ac, seekInformation.SeekPosition); - Ebml_EndSubElement(ebml_out, ebmlLoc); -} - -void Mkv_WriteSegmentInformation(Ebml& ebml_out, SegmentInformationStruct& segmentInformation) -{ - Ebml_SerializeUnsigned(ebml_out, 0x73a4, segmentInformation.segmentUID); - if (segmentInformation.filename != 0) - Ebml_SerializeString(ebml_out, 0x7384, segmentInformation.filename); - Ebml_SerializeUnsigned(ebml_out, 0x2AD7B1, segmentInformation.TimecodeScale); - Ebml_SerializeUnsigned(ebml_out, 0x4489, segmentInformation.Duration); - // TODO date - Ebml_SerializeWString(ebml_out, 0x4D80, L"MKVMUX"); - Ebml_SerializeWString(ebml_out, 0x5741, segmentInformation.WritingApp); -} - -void Mkv_InitializeTrack(Ebml& ebml_out, EbmlLoc& ebmlLoc) -{ - Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1654AE6B); -} - -static void Mkv_WriteGenericTrackData(Ebml& ebml_out, TrackStruct& track) -{ - Ebml_SerializeUnsigned(ebml_out, 0xD7, track.TrackNumber); - Ebml_SerializeUnsigned(ebml_out, 0x73C5, track.TrackUID); - Ebml_SerializeUnsigned(ebml_out, 0x83, track.TrackType); - Ebml_SerializeUnsigned(ebml_out, 0xB9, track.FlagEnabled ? 1 :0); - Ebml_SerializeUnsigned(ebml_out, 0x88, track.FlagDefault ? 1 :0); - Ebml_SerializeUnsigned(ebml_out, 0x55AA, track.FlagForced ? 1 :0); - if (track.Language != 0) - Ebml_SerializeString(ebml_out, 0x22B59C, track.Language); - if (track.CodecID != 0) - Ebml_SerializeString(ebml_out, 0x86, track.CodecID); - if (track.CodecPrivate != 0) - Ebml_SerializeData(ebml_out, 0x63A2, track.CodecPrivate, track.CodecPrivateLength); - if (track.CodecName != 0) - Ebml_SerializeWString(ebml_out, 0x258688, track.CodecName); -} - -void Mkv_WriteVideoTrack(Ebml& ebml_out, TrackStruct & track, VideoTrackStruct& video) -{ - EbmlLoc trackHeadLoc, videoHeadLoc; - Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE); // start Track - Mkv_WriteGenericTrackData(ebml_out, track); - Ebml_StartSubElement(ebml_out, videoHeadLoc, 0xE0); // start Video - Ebml_SerializeUnsigned(ebml_out, 0x9A, video.FlagInterlaced ? 1 :0); - Ebml_SerializeUnsigned(ebml_out, 0xB0, video.PixelWidth); - Ebml_SerializeUnsigned(ebml_out, 0xBA, video.PixelHeight); - Ebml_SerializeUnsigned(ebml_out, 0x54B0, video.PixelDisplayWidth); - Ebml_SerializeUnsigned(ebml_out, 0x54BA, video.PixelDisplayHeight); - Ebml_SerializeUnsigned(ebml_out, 0x54B2, video.displayUnit); - Ebml_SerializeFloat(ebml_out, 0x2383E3, video.FrameRate); - Ebml_EndSubElement(ebml_out, videoHeadLoc); - Ebml_EndSubElement(ebml_out, trackHeadLoc); - -} - -void Mkv_WriteAudioTrack(Ebml& ebml_out, TrackStruct & track, AudioTrackStruct& video) -{ - EbmlLoc trackHeadLoc, audioHeadLoc; - Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE); - Mkv_WriteGenericTrackData(ebml_out, track); - Ebml_StartSubElement(ebml_out, audioHeadLoc, 0xE0); // start Audio - Ebml_SerializeFloat(ebml_out, 0xB5, video.SamplingFrequency); - Ebml_SerializeUnsigned(ebml_out, 0x9F, video.Channels); - Ebml_SerializeUnsigned(ebml_out, 0x6264, video.BitDepth); - Ebml_EndSubElement(ebml_out, audioHeadLoc); // end audio - Ebml_EndSubElement(ebml_out, trackHeadLoc); -} - -void Mkv_WriteEbmlClusterHead(Ebml& ebml_out, EbmlLoc& ebmlLoc, ClusterHeadStruct & clusterHead) -{ - Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1F43B675); - Ebml_SerializeUnsigned(ebml_out, 0x6264, clusterHead.TimeCode); -} - -void Mkv_WriteSimpleBlockHead(Ebml& ebml_out, EbmlLoc& ebmlLoc, SimpleBlockStruct& block) -{ - Ebml_StartSubElement(ebml_out, ebmlLoc, 0xA3); - Ebml_Write1UInt(ebml_out, block.TrackNumber); - Ebml_WriteSigned16(ebml_out,block.TimeCode); - unsigned char flags = 0x00 | (block.iskey ? 0x80:0x00) | (block.lacing << 1) | block.discardable; - Ebml_Write1UInt(ebml_out, flags); // TODO this may be the wrong function - Ebml_Serialize(ebml_out, block.data, block.dataLength); - Ebml_EndSubElement(ebml_out,ebmlLoc); -} -*/ diff --git a/source/libvpx/libmkv/WebMElement.h b/source/libvpx/libmkv/WebMElement.h deleted file mode 100644 index d9ad0a0..0000000 --- a/source/libvpx/libmkv/WebMElement.h +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright (c) 2010 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - - -#ifndef MKV_CONTEXT_HPP -#define MKV_CONTEXT_HPP 1 - -void writeSimpleBock(EbmlGlobal *ebml, unsigned char trackNumber, unsigned short timeCode, - int isKeyframe, unsigned char lacingFlag, int discardable, - unsigned char *data, unsigned long dataLength); - - -// these are helper functions -void writeHeader(EbmlGlobal *ebml); -void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc *startInfo, unsigned long timeCodeScale, double duration); -// this function is a helper only, it assumes a lot of defaults -void writeVideoTrack(EbmlGlobal *ebml, unsigned int trackNumber, int flagLacing, - char *codecId, unsigned int pixelWidth, unsigned int pixelHeight, - double frameRate); -void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing, - char *codecId, double samplingFrequency, unsigned int channels, - unsigned char *private, unsigned long privateSize); - -void writeSimpleBlock(EbmlGlobal *ebml, unsigned char trackNumber, short timeCode, - int isKeyframe, unsigned char lacingFlag, int discardable, - unsigned char *data, unsigned long dataLength); - - - -#endif
\ No newline at end of file diff --git a/source/libvpx/libmkv/testlibmkv.c b/source/libvpx/libmkv/testlibmkv.c deleted file mode 100644 index 97bcf95..0000000 --- a/source/libvpx/libmkv/testlibmkv.c +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2010 The WebM project authors. All Rights Reserved. -// -// Use of this source code is governed by a BSD-style license -// that can be found in the LICENSE file in the root of the source -// tree. An additional intellectual property rights grant can be found -// in the file PATENTS. All contributing project authors may -// be found in the AUTHORS file in the root of the source tree. - - - -#include "EbmlIDs.h" -#include "EbmlBufferWriter.h" -#include "WebMElement.h" - -#include <stdio.h> -int main(int argc, char *argv[]) { - // init the datatype we're using for ebml output - unsigned char data[8192]; - EbmlGlobal ebml; - ebml.buf = data; - ebml.offset = 0; - ebml.length = 8192; - - writeHeader(&ebml); - { - EbmlLoc startSegment; - Ebml_StartSubElement(&ebml, &startSegment, Segment); // segment - { - // segment info - EbmlLoc startInfo; - Ebml_StartSubElement(&ebml, &startInfo, Info); - Ebml_SerializeString(&ebml, 0x4D80, "muxingAppLibMkv"); - Ebml_SerializeString(&ebml, 0x5741, "writingAppLibMkv"); - Ebml_EndSubElement(&ebml, &startInfo); - } - - { - EbmlLoc trackStart; - Ebml_StartSubElement(&ebml, &trackStart, Tracks); - writeVideoTrack(&ebml, 1, 1, "V_MS/VFW/FOURCC", 320, 240, 29.97); - // writeAudioTrack(&ebml,2,1, "A_VORBIS", 32000, 1, NULL, 0); - Ebml_EndSubElement(&ebml, &trackStart); - } - - { - EbmlLoc clusterStart; - Ebml_StartSubElement(&ebml, &clusterStart, Cluster); // cluster - Ebml_SerializeUnsigned(&ebml, Timecode, 0); - - unsigned char someData[4] = {1, 2, 3, 4}; - writeSimpleBlock(&ebml, 1, 0, 1, 0, 0, someData, 4); - Ebml_EndSubElement(&ebml, &clusterStart); - } // end cluster - Ebml_EndSubElement(&ebml, &startSegment); - } - - // dump ebml stuff to the file - FILE *file_out = fopen("test.mkv", "wb"); - size_t bytesWritten = fwrite(data, 1, ebml.offset, file_out); - fclose(file_out); - return 0; -}
\ No newline at end of file diff --git a/source/libvpx/libs.mk b/source/libvpx/libs.mk index 43545e3..4691a12 100644 --- a/source/libvpx/libs.mk +++ b/source/libvpx/libs.mk @@ -183,8 +183,6 @@ CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec INSTALL-LIBS-yes += include/vpx/vpx_codec.h INSTALL-LIBS-yes += include/vpx/vpx_image.h INSTALL-LIBS-yes += include/vpx/vpx_integer.h -INSTALL-LIBS-yes += include/vpx/vpx_codec_impl_top.h -INSTALL-LIBS-yes += include/vpx/vpx_codec_impl_bottom.h INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx/vpx_decoder.h INSTALL-LIBS-$(CONFIG_ENCODERS) += include/vpx/vpx_encoder.h ifeq ($(CONFIG_EXTERNAL_BUILD),yes) diff --git a/source/libvpx/tools/lint-hunks.py b/source/libvpx/tools/lint-hunks.py index b15a691..6e25d93 100644 --- a/source/libvpx/tools/lint-hunks.py +++ b/source/libvpx/tools/lint-hunks.py @@ -24,7 +24,7 @@ TOPLEVEL_CMD = ["git", "rev-parse", "--show-toplevel"] DIFF_CMD = ["git", "diff"] DIFF_INDEX_CMD = ["git", "diff-index", "-u", "HEAD", "--"] SHOW_CMD = ["git", "show"] -CPPLINT_FILTERS = ["-readability/casting", "-runtime/int"] +CPPLINT_FILTERS = ["-readability/casting"] class Usage(Exception): diff --git a/source/libvpx/vp8/common/filter.c b/source/libvpx/vp8/common/filter.c index 1901ea3..25266f8 100644 --- a/source/libvpx/vp8/common/filter.c +++ b/source/libvpx/vp8/common/filter.c @@ -9,9 +9,7 @@ */ -#include <stdlib.h> #include "filter.h" -#include "vpx_ports/mem.h" DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { diff --git a/source/libvpx/vp8/common/filter.h b/source/libvpx/vp8/common/filter.h index b7591f2..ccda7c8 100644 --- a/source/libvpx/vp8/common/filter.h +++ b/source/libvpx/vp8/common/filter.h @@ -12,11 +12,13 @@ #ifndef FILTER_H #define FILTER_H +#include "vpx_ports/mem.h" + #define BLOCK_HEIGHT_WIDTH 4 #define VP8_FILTER_WEIGHT 128 #define VP8_FILTER_SHIFT 7 -extern const short vp8_bilinear_filters[8][2]; -extern const short vp8_sub_pel_filters[8][6]; +extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]); +extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]); #endif diff --git a/source/libvpx/vp8/common/findnearmv.h b/source/libvpx/vp8/common/findnearmv.h index 06ef060..c60e463 100644 --- a/source/libvpx/vp8/common/findnearmv.h +++ b/source/libvpx/vp8/common/findnearmv.h @@ -124,7 +124,7 @@ static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) b += 16; } - return (cur_mb->bmi + b - 4)->mv.as_int; + return (cur_mb->bmi + (b - 4))->mv.as_int; } static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { diff --git a/source/libvpx/vp8/common/idct_blk.c b/source/libvpx/vp8/common/idct_blk.c index 8edfffb..65d5002 100644 --- a/source/libvpx/vp8/common/idct_blk.c +++ b/source/libvpx/vp8/common/idct_blk.c @@ -10,6 +10,7 @@ #include "vpx_config.h" #include "vp8_rtcd.h" +#include "vpx_mem/vpx_mem.h" void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *dest, int stride); @@ -32,7 +33,7 @@ void vp8_dequant_idct_add_y_block_c else { vp8_dc_only_idct_add_c (q[0]*dq[0], dst, stride, dst, stride); - ((int *)q)[0] = 0; + vpx_memset(q, 0, 2 * sizeof(q[0])); } q += 16; @@ -58,7 +59,7 @@ void vp8_dequant_idct_add_uv_block_c else { vp8_dc_only_idct_add_c (q[0]*dq[0], dstu, stride, dstu, stride); - ((int *)q)[0] = 0; + vpx_memset(q, 0, 2 * sizeof(q[0])); } q += 16; @@ -77,7 +78,7 @@ void vp8_dequant_idct_add_uv_block_c else { vp8_dc_only_idct_add_c (q[0]*dq[0], dstv, stride, dstv, stride); - ((int *)q)[0] = 0; + vpx_memset(q, 0, 2 * sizeof(q[0])); } q += 16; diff --git a/source/libvpx/vp8/common/reconinter.c b/source/libvpx/vp8/common/reconinter.c index 43f84d0..bac3c94 100644 --- a/source/libvpx/vp8/common/reconinter.c +++ b/source/libvpx/vp8/common/reconinter.c @@ -138,14 +138,10 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, { for (r = 0; r < 4; r++) { -#if !(CONFIG_FAST_UNALIGNED) pred_ptr[0] = ptr[0]; pred_ptr[1] = ptr[1]; pred_ptr[2] = ptr[2]; pred_ptr[3] = ptr[3]; -#else - *(uint32_t *)pred_ptr = *(uint32_t *)ptr ; -#endif pred_ptr += pitch; ptr += pre_stride; } @@ -196,16 +192,12 @@ static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stri { for (r = 0; r < 4; r++) { -#if !(CONFIG_FAST_UNALIGNED) dst[0] = ptr[0]; dst[1] = ptr[1]; dst[2] = ptr[2]; dst[3] = ptr[3]; -#else - *(uint32_t *)dst = *(uint32_t *)ptr ; -#endif - dst += dst_stride; - ptr += pre_stride; + dst += dst_stride; + ptr += pre_stride; } } } @@ -270,7 +262,7 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) + x->block[yoffset+4].bmi.mv.as_mv.row + x->block[yoffset+5].bmi.mv.as_mv.row; - temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3); + temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; @@ -279,7 +271,7 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) + x->block[yoffset+4].bmi.mv.as_mv.col + x->block[yoffset+5].bmi.mv.as_mv.col; - temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3); + temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; @@ -558,7 +550,7 @@ void build_4x4uvmvs(MACROBLOCKD *x) + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row; - temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3); + temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; @@ -567,7 +559,7 @@ void build_4x4uvmvs(MACROBLOCKD *x) + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col; - temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3); + temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; diff --git a/source/libvpx/vp8/common/x86/filter_x86.c b/source/libvpx/vp8/common/x86/filter_x86.c index ebab814..7f496ed 100644 --- a/source/libvpx/vp8/common/x86/filter_x86.c +++ b/source/libvpx/vp8/common/x86/filter_x86.c @@ -8,7 +8,7 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "vpx_ports/mem.h" +#include "vp8/common/x86/filter_x86.h" DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) = { diff --git a/source/libvpx/vp8/common/x86/filter_x86.h b/source/libvpx/vp8/common/x86/filter_x86.h index efcc4dc..cfadaee 100644 --- a/source/libvpx/vp8/common/x86/filter_x86.h +++ b/source/libvpx/vp8/common/x86/filter_x86.h @@ -11,9 +11,15 @@ #ifndef FILTER_X86_H #define FILTER_X86_H +#include "vpx_ports/mem.h" + /* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with * duplicated values */ -extern const short vp8_bilinear_filters_x86_4[8][8]; /* duplicated 4x */ -extern const short vp8_bilinear_filters_x86_8[8][16]; /* duplicated 8x */ + +/* duplicated 4x */ +extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]); + +/* duplicated 8x */ +extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]); #endif /* FILTER_X86_H */ diff --git a/source/libvpx/vp8/common/x86/idct_blk_mmx.c b/source/libvpx/vp8/common/x86/idct_blk_mmx.c index 49b2013..a1e4ce6 100644 --- a/source/libvpx/vp8/common/x86/idct_blk_mmx.c +++ b/source/libvpx/vp8/common/x86/idct_blk_mmx.c @@ -11,6 +11,7 @@ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vp8/common/blockd.h" +#include "vpx_mem/vpx_mem.h" extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); @@ -35,7 +36,7 @@ void vp8_dequant_idct_add_y_block_mmx else if (eobs[0] == 1) { vp8_dc_only_idct_add_mmx (q[0]*dq[0], dst, stride, dst, stride); - ((int *)q)[0] = 0; + vpx_memset(q, 0, 2 * sizeof(q[0])); } if (eobs[1] > 1) @@ -44,7 +45,7 @@ void vp8_dequant_idct_add_y_block_mmx { vp8_dc_only_idct_add_mmx (q[16]*dq[0], dst+4, stride, dst+4, stride); - ((int *)(q+16))[0] = 0; + vpx_memset(q + 16, 0, 2 * sizeof(q[0])); } if (eobs[2] > 1) @@ -53,7 +54,7 @@ void vp8_dequant_idct_add_y_block_mmx { vp8_dc_only_idct_add_mmx (q[32]*dq[0], dst+8, stride, dst+8, stride); - ((int *)(q+32))[0] = 0; + vpx_memset(q + 32, 0, 2 * sizeof(q[0])); } if (eobs[3] > 1) @@ -62,7 +63,7 @@ void vp8_dequant_idct_add_y_block_mmx { vp8_dc_only_idct_add_mmx (q[48]*dq[0], dst+12, stride, dst+12, stride); - ((int *)(q+48))[0] = 0; + vpx_memset(q + 48, 0, 2 * sizeof(q[0])); } q += 64; @@ -84,7 +85,7 @@ void vp8_dequant_idct_add_uv_block_mmx else if (eobs[0] == 1) { vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstu, stride, dstu, stride); - ((int *)q)[0] = 0; + vpx_memset(q, 0, 2 * sizeof(q[0])); } if (eobs[1] > 1) @@ -93,7 +94,7 @@ void vp8_dequant_idct_add_uv_block_mmx { vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstu+4, stride, dstu+4, stride); - ((int *)(q+16))[0] = 0; + vpx_memset(q + 16, 0, 2 * sizeof(q[0])); } q += 32; @@ -108,7 +109,7 @@ void vp8_dequant_idct_add_uv_block_mmx else if (eobs[0] == 1) { vp8_dc_only_idct_add_mmx (q[0]*dq[0], dstv, stride, dstv, stride); - ((int *)q)[0] = 0; + vpx_memset(q, 0, 2 * sizeof(q[0])); } if (eobs[1] > 1) @@ -117,7 +118,7 @@ void vp8_dequant_idct_add_uv_block_mmx { vp8_dc_only_idct_add_mmx (q[16]*dq[0], dstv+4, stride, dstv+4, stride); - ((int *)(q+16))[0] = 0; + vpx_memset(q + 16, 0, 2 * sizeof(q[0])); } q += 32; diff --git a/source/libvpx/vp8/common/x86/vp8_asm_stubs.c b/source/libvpx/vp8/common/x86/vp8_asm_stubs.c index c0416b7..b409293 100644 --- a/source/libvpx/vp8/common/x86/vp8_asm_stubs.c +++ b/source/libvpx/vp8/common/x86/vp8_asm_stubs.c @@ -611,16 +611,12 @@ void vp8_sixtap_predict4x4_ssse3 for (r = 0; r < 4; r++) { - #if !(CONFIG_FAST_UNALIGNED) dst_ptr[0] = src_ptr[0]; dst_ptr[1] = src_ptr[1]; dst_ptr[2] = src_ptr[2]; dst_ptr[3] = src_ptr[3]; - #else - *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ; - #endif - dst_ptr += dst_pitch; - src_ptr += src_pixels_per_line; + dst_ptr += dst_pitch; + src_ptr += src_pixels_per_line; } } } diff --git a/source/libvpx/vp8/decoder/decodemv.c b/source/libvpx/vp8/decoder/decodemv.c index 4327d3b..35a22c7 100644 --- a/source/libvpx/vp8/decoder/decodemv.c +++ b/source/libvpx/vp8/decoder/decodemv.c @@ -110,8 +110,8 @@ static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc) { - mv->row = (short)(read_mvcomponent(r, mvc) << 1); - mv->col = (short)(read_mvcomponent(r, ++mvc) << 1); + mv->row = (short)(read_mvcomponent(r, mvc) * 2); + mv->col = (short)(read_mvcomponent(r, ++mvc) * 2); } @@ -292,9 +292,9 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, blockmv.as_int = 0; if( vp8_read(bc, prob[2]) ) { - blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) << 1; + blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2; blockmv.as_mv.row += best_mv.as_mv.row; - blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1; + blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2; blockmv.as_mv.col += best_mv.as_mv.col; } } diff --git a/source/libvpx/vp8/decoder/decodframe.c b/source/libvpx/vp8/decoder/decodframe.c index 0050c11..16da78a 100644 --- a/source/libvpx/vp8/decoder/decodframe.c +++ b/source/libvpx/vp8/decoder/decodframe.c @@ -211,7 +211,7 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, (b->qcoeff[0] * DQC[0], dst, dst_stride, dst, dst_stride); - ((int *)b->qcoeff)[0] = 0; + vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } } } @@ -248,21 +248,14 @@ static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; + vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); } else { b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); - ((int *)b->qcoeff)[0] = 0; + vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } /* override the dc dequant constant in order to preserve the @@ -576,7 +569,7 @@ static void decode_mb_rows(VP8D_COMP *pbi) xd->left_available = 0; - xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; xd->recon_above[0] = dst_buffer[0] + recon_yoffset; diff --git a/source/libvpx/vp8/decoder/threading.c b/source/libvpx/vp8/decoder/threading.c index 7303189..fe290cf 100644 --- a/source/libvpx/vp8/decoder/threading.c +++ b/source/libvpx/vp8/decoder/threading.c @@ -227,7 +227,7 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, { vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst, dst_stride); - ((int *)b->qcoeff)[0] = 0; + vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } } } @@ -264,21 +264,14 @@ static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); - ((int *)b->qcoeff)[0] = 0; - ((int *)b->qcoeff)[1] = 0; - ((int *)b->qcoeff)[2] = 0; - ((int *)b->qcoeff)[3] = 0; - ((int *)b->qcoeff)[4] = 0; - ((int *)b->qcoeff)[5] = 0; - ((int *)b->qcoeff)[6] = 0; - ((int *)b->qcoeff)[7] = 0; + vpx_memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); } else { b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); - ((int *)b->qcoeff)[0] = 0; + vpx_memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } /* override the dc dequant constant in order to preserve the diff --git a/source/libvpx/vp8/encoder/bitstream.c b/source/libvpx/vp8/encoder/bitstream.c index 5f0c1f7..78e54e2 100644 --- a/source/libvpx/vp8/encoder/bitstream.c +++ b/source/libvpx/vp8/encoder/bitstream.c @@ -432,7 +432,7 @@ static void write_mv_ref assert(NEARESTMV <= m && m <= SPLITMV); #endif vp8_write_token(w, vp8_mv_ref_tree, p, - vp8_mv_ref_encoding_array - NEARESTMV + m); + vp8_mv_ref_encoding_array + (m - NEARESTMV)); } static void write_sub_mv_ref @@ -444,7 +444,7 @@ static void write_sub_mv_ref assert(LEFT4X4 <= m && m <= NEW4X4); #endif vp8_write_token(w, vp8_sub_mv_ref_tree, p, - vp8_sub_mv_ref_encoding_array - LEFT4X4 + m); + vp8_sub_mv_ref_encoding_array + (m - LEFT4X4)); } static void write_mv @@ -577,7 +577,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi) */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; - xd->mb_to_top_edge = -((mb_row * 16)) << 3; + xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; #ifdef VP8_ENTROPY_STATS @@ -1062,7 +1062,7 @@ int vp8_update_coef_context(VP8_COMP *cpi) if (cpi->common.frame_type == KEY_FRAME) { /* Reset to default counts/probabilities at key frames */ - vp8_copy(cpi->coef_counts, default_coef_counts); + vp8_copy(cpi->mb.coef_counts, default_coef_counts); } if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) diff --git a/source/libvpx/vp8/encoder/dct.c b/source/libvpx/vp8/encoder/dct.c index b5a11ae..091554a 100644 --- a/source/libvpx/vp8/encoder/dct.c +++ b/source/libvpx/vp8/encoder/dct.c @@ -20,10 +20,10 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch) for (i = 0; i < 4; i++) { - a1 = ((ip[0] + ip[3])<<3); - b1 = ((ip[1] + ip[2])<<3); - c1 = ((ip[1] - ip[2])<<3); - d1 = ((ip[0] - ip[3])<<3); + a1 = ((ip[0] + ip[3]) * 8); + b1 = ((ip[1] + ip[2]) * 8); + c1 = ((ip[1] - ip[2]) * 8); + d1 = ((ip[0] - ip[3]) * 8); op[0] = a1 + b1; op[2] = a1 - b1; @@ -72,10 +72,10 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch) for (i = 0; i < 4; i++) { - a1 = ((ip[0] + ip[2])<<2); - d1 = ((ip[1] + ip[3])<<2); - c1 = ((ip[1] - ip[3])<<2); - b1 = ((ip[0] - ip[2])<<2); + a1 = ((ip[0] + ip[2]) * 4); + d1 = ((ip[1] + ip[3]) * 4); + c1 = ((ip[1] - ip[3]) * 4); + b1 = ((ip[0] - ip[2]) * 4); op[0] = a1 + d1 + (a1!=0); op[1] = b1 + c1; diff --git a/source/libvpx/vp8/encoder/firstpass.c b/source/libvpx/vp8/encoder/firstpass.c index ded0c43..968c7f3 100644 --- a/source/libvpx/vp8/encoder/firstpass.c +++ b/source/libvpx/vp8/encoder/firstpass.c @@ -711,8 +711,8 @@ skip_motion_search: neutral_count++; } - d->bmi.mv.as_mv.row <<= 3; - d->bmi.mv.as_mv.col <<= 3; + d->bmi.mv.as_mv.row *= 8; + d->bmi.mv.as_mv.col *= 8; this_error = motion_error; vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv); vp8_encode_inter16x16y(x); @@ -909,13 +909,16 @@ extern const int vp8_bits_per_mb[2][QINDEX_RANGE]; static double bitcost( double prob ) { - return -(log( prob ) / log( 2.0 )); + if (prob > 0.000122) + return -log(prob) / log(2.0); + else + return 13.0; } static int64_t estimate_modemvcost(VP8_COMP *cpi, FIRSTPASS_STATS * fpstats) { int mv_cost; - int mode_cost; + int64_t mode_cost; double av_pct_inter = fpstats->pcnt_inter / fpstats->count; double av_pct_motion = fpstats->pcnt_motion / fpstats->count; @@ -937,10 +940,9 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi, /* Crude estimate of overhead cost from modes * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb */ - mode_cost = - (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) + - (av_pct_motion * motion_cost) + - (av_intra * intra_cost) ) * cpi->common.MBs ) << 9; + mode_cost =((((av_pct_inter - av_pct_motion) * zz_cost) + + (av_pct_motion * motion_cost) + + (av_intra * intra_cost)) * cpi->common.MBs) * 512; return mv_cost + mode_cost; } diff --git a/source/libvpx/vp8/encoder/mcomp.c b/source/libvpx/vp8/encoder/mcomp.c index 83c3989..0b11ea6 100644 --- a/source/libvpx/vp8/encoder/mcomp.c +++ b/source/libvpx/vp8/encoder/mcomp.c @@ -210,7 +210,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, unsigned char *z = (*(b->base_src) + b->src); int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; - int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2; + int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4; int tr = br, tc = bc; unsigned int besterr; unsigned int left, right, up, down, diag; @@ -220,10 +220,14 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, unsigned int quarteriters = 4; int thismse; - int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); - int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); - int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); - int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); + int minc = MAX(x->mv_col_min * 4, + (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); + int maxc = MIN(x->mv_col_max * 4, + (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); + int minr = MAX(x->mv_row_min * 4, + (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); + int maxr = MIN(x->mv_row_max * 4, + (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); int y_stride; int offset; @@ -254,8 +258,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; /* central mv */ - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; + bestmv->as_mv.row *= 8; + bestmv->as_mv.col *= 8; /* calculate central point error */ besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); @@ -337,8 +341,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, tc = bc; } - bestmv->as_mv.row = br << 1; - bestmv->as_mv.col = bc << 1; + bestmv->as_mv.row = br * 2; + bestmv->as_mv.col = bc * 2; if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) || (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3))) @@ -699,8 +703,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, #endif /* central mv */ - bestmv->as_mv.row <<= 3; - bestmv->as_mv.col <<= 3; + bestmv->as_mv.row *= 8; + bestmv->as_mv.col *= 8; startmv = *bestmv; /* calculate central point error */ @@ -1315,8 +1319,8 @@ int vp8_diamond_search_sadx4 (*num00)++; } - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; + this_mv.as_mv.row = best_mv->as_mv.row * 8; + this_mv.as_mv.col = best_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); @@ -1709,8 +1713,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, } } - this_mv.as_mv.row = best_mv->as_mv.row << 3; - this_mv.as_mv.col = best_mv->as_mv.col << 3; + this_mv.as_mv.row = best_mv->as_mv.row * 8; + this_mv.as_mv.col = best_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); @@ -1905,8 +1909,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, } } - this_mv.as_mv.row = ref_mv->as_mv.row << 3; - this_mv.as_mv.col = ref_mv->as_mv.col << 3; + this_mv.as_mv.row = ref_mv->as_mv.row * 8; + this_mv.as_mv.col = ref_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); diff --git a/source/libvpx/vp8/encoder/rdopt.c b/source/libvpx/vp8/encoder/rdopt.c index 521e84f..5016cc4 100644 --- a/source/libvpx/vp8/encoder/rdopt.c +++ b/source/libvpx/vp8/encoder/rdopt.c @@ -935,7 +935,7 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) assert(NEARESTMV <= m && m <= SPLITMV); vp8_mv_ref_probs(p, near_mv_ref_ct); return vp8_cost_token(vp8_mv_ref_tree, p, - vp8_mv_ref_encoding_array - NEARESTMV + m); + vp8_mv_ref_encoding_array + (m - NEARESTMV)); } void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) diff --git a/source/libvpx/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm b/source/libvpx/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm deleted file mode 100644 index 174e747..0000000 --- a/source/libvpx/vp9/decoder/arm/neon/vp9_add_constant_residual_neon.asm +++ /dev/null @@ -1,230 +0,0 @@ -; -; Copyright (c) 2013 The WebM project authors. All Rights Reserved. -; -; Use of this source code is governed by a BSD-style license -; that can be found in the LICENSE file in the root of the source -; tree. An additional intellectual property rights grant can be found -; in the file PATENTS. All contributing project authors may -; be found in the AUTHORS file in the root of the source tree. -; - - EXPORT |vp9_add_constant_residual_8x8_neon| - EXPORT |vp9_add_constant_residual_16x16_neon| - EXPORT |vp9_add_constant_residual_32x32_neon| - ARM - - AREA ||.text||, CODE, READONLY, ALIGN=2 - - MACRO - LD_16x8 $src, $stride - vld1.8 {q8}, [$src], $stride - vld1.8 {q9}, [$src], $stride - vld1.8 {q10}, [$src], $stride - vld1.8 {q11}, [$src], $stride - vld1.8 {q12}, [$src], $stride - vld1.8 {q13}, [$src], $stride - vld1.8 {q14}, [$src], $stride - vld1.8 {q15}, [$src], $stride - MEND - - MACRO - ADD_DIFF_16x8 $diff - vqadd.u8 q8, q8, $diff - vqadd.u8 q9, q9, $diff - vqadd.u8 q10, q10, $diff - vqadd.u8 q11, q11, $diff - vqadd.u8 q12, q12, $diff - vqadd.u8 q13, q13, $diff - vqadd.u8 q14, q14, $diff - vqadd.u8 q15, q15, $diff - MEND - - MACRO - SUB_DIFF_16x8 $diff - vqsub.u8 q8, q8, $diff - vqsub.u8 q9, q9, $diff - vqsub.u8 q10, q10, $diff - vqsub.u8 q11, q11, $diff - vqsub.u8 q12, q12, $diff - vqsub.u8 q13, q13, $diff - vqsub.u8 q14, q14, $diff - vqsub.u8 q15, q15, $diff - MEND - - MACRO - ST_16x8 $dst, $stride - vst1.8 {q8}, [$dst], $stride - vst1.8 {q9}, [$dst], $stride - vst1.8 {q10}, [$dst], $stride - vst1.8 {q11}, [$dst], $stride - vst1.8 {q12}, [$dst], $stride - vst1.8 {q13}, [$dst], $stride - vst1.8 {q14}, [$dst], $stride - vst1.8 {q15}, [$dst], $stride - MEND - -; void add_constant_residual(const int16_t diff, uint8_t *dest, int stride, -; int width, int height) { -; int r, c; -; -; for (r = 0; r < height; r++) { -; for (c = 0; c < width; c++) -; dest[c] = clip_pixel(diff + dest[c]); -; -; dest += stride; -; } -;} -;void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest, -; int stride) { -; add_constant_residual(diff, dest, stride, 8, 8); -;} -; r0 : const int16_t diff -; r1 : const uint8_t *dest -; r2 : int stride -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -|vp9_add_constant_residual_8x8_neon| PROC - mov r3, r1 ; r3: save dest to r3 - vld1.8 {d0}, [r1], r2 - vld1.8 {d1}, [r1], r2 - vld1.8 {d2}, [r1], r2 - vld1.8 {d3}, [r1], r2 - vld1.8 {d4}, [r1], r2 - vld1.8 {d5}, [r1], r2 - vld1.8 {d6}, [r1], r2 - vld1.8 {d7}, [r1], r2 - cmp r0, #0 - bge DIFF_POSITIVE_8x8 - -DIFF_NEGATIVE_8x8 ; diff < 0 - neg r0, r0 - usat r0, #8, r0 - vdup.u8 q8, r0 - - vqsub.u8 q0, q0, q8 - vqsub.u8 q1, q1, q8 - vqsub.u8 q2, q2, q8 - vqsub.u8 q3, q3, q8 - b DIFF_SAVE_8x8 - -DIFF_POSITIVE_8x8 ; diff >= 0 - usat r0, #8, r0 - vdup.u8 q8, r0 - - vqadd.u8 q0, q0, q8 - vqadd.u8 q1, q1, q8 - vqadd.u8 q2, q2, q8 - vqadd.u8 q3, q3, q8 - -DIFF_SAVE_8x8 - vst1.8 {d0}, [r3], r2 - vst1.8 {d1}, [r3], r2 - vst1.8 {d2}, [r3], r2 - vst1.8 {d3}, [r3], r2 - vst1.8 {d4}, [r3], r2 - vst1.8 {d5}, [r3], r2 - vst1.8 {d6}, [r3], r2 - vst1.8 {d7}, [r3], r2 - - bx lr - ENDP - -;void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest, -; int stride) { -; add_constant_residual(diff, dest, stride, 16, 16); -;} -; r0 : const int16_t diff -; r1 : const uint8_t *dest -; r2 : int stride -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -|vp9_add_constant_residual_16x16_neon| PROC - mov r3, r1 - LD_16x8 r1, r2 - cmp r0, #0 - bge DIFF_POSITIVE_16x16 - -|DIFF_NEGATIVE_16x16| - neg r0, r0 - usat r0, #8, r0 - vdup.u8 q0, r0 - - SUB_DIFF_16x8 q0 - ST_16x8 r3, r2 - LD_16x8 r1, r2 - SUB_DIFF_16x8 q0 - b DIFF_SAVE_16x16 - -|DIFF_POSITIVE_16x16| - usat r0, #8, r0 - vdup.u8 q0, r0 - - ADD_DIFF_16x8 q0 - ST_16x8 r3, r2 - LD_16x8 r1, r2 - ADD_DIFF_16x8 q0 - -|DIFF_SAVE_16x16| - ST_16x8 r3, r2 - bx lr - ENDP - -;void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest, -; int stride) { -; add_constant_residual(diff, dest, stride, 32, 32); -;} -; r0 : const int16_t diff -; r1 : const uint8_t *dest -; r2 : int stride -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -|vp9_add_constant_residual_32x32_neon| PROC - push {r4,lr} - pld [r1] - mov r3, r1 - add r4, r1, #16 ; r4 dest + 16 for second loop - cmp r0, #0 - bge DIFF_POSITIVE_32x32 - -|DIFF_NEGATIVE_32x32| - neg r0, r0 - usat r0, #8, r0 - vdup.u8 q0, r0 - mov r0, #4 - -|DIFF_NEGATIVE_32x32_LOOP| - sub r0, #1 - LD_16x8 r1, r2 - SUB_DIFF_16x8 q0 - ST_16x8 r3, r2 - - LD_16x8 r1, r2 - SUB_DIFF_16x8 q0 - ST_16x8 r3, r2 - cmp r0, #2 - moveq r1, r4 - moveq r3, r4 - cmp r0, #0 - bne DIFF_NEGATIVE_32x32_LOOP - pop {r4,pc} - -|DIFF_POSITIVE_32x32| - usat r0, #8, r0 - vdup.u8 q0, r0 - mov r0, #4 - -|DIFF_POSITIVE_32x32_LOOP| - sub r0, #1 - LD_16x8 r1, r2 - ADD_DIFF_16x8 q0 - ST_16x8 r3, r2 - - LD_16x8 r1, r2 - ADD_DIFF_16x8 q0 - ST_16x8 r3, r2 - cmp r0, #2 - moveq r1, r4 - moveq r3, r4 - cmp r0, #0 - bne DIFF_POSITIVE_32x32_LOOP - pop {r4,pc} - ENDP - - END diff --git a/source/libvpx/vp9/decoder/vp9_decodemv.c b/source/libvpx/vp9/decoder/vp9_decodemv.c index 580dc4c..475a299 100644 --- a/source/libvpx/vp9/decoder/vp9_decodemv.c +++ b/source/libvpx/vp9/decoder/vp9_decodemv.c @@ -23,7 +23,6 @@ #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decodframe.h" #include "vp9/decoder/vp9_onyxd_int.h" -#include "vp9/decoder/vp9_dsubexp.h" #include "vp9/decoder/vp9_treereader.h" static MB_PREDICTION_MODE read_intra_mode(vp9_reader *r, const vp9_prob *p) { @@ -34,7 +33,8 @@ static MB_PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, vp9_reader *r, int size_group) { const MB_PREDICTION_MODE y_mode = read_intra_mode(r, cm->fc.y_mode_prob[size_group]); - ++cm->counts.y_mode[size_group][y_mode]; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.y_mode[size_group][y_mode]; return y_mode; } @@ -42,7 +42,8 @@ static MB_PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, vp9_reader *r, MB_PREDICTION_MODE y_mode) { const MB_PREDICTION_MODE uv_mode = read_intra_mode(r, cm->fc.uv_mode_prob[y_mode]); - ++cm->counts.uv_mode[y_mode][uv_mode]; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.uv_mode[y_mode][uv_mode]; return uv_mode; } @@ -50,7 +51,8 @@ static MB_PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, vp9_reader *r, uint8_t context) { const MB_PREDICTION_MODE mode = treed_read(r, vp9_inter_mode_tree, cm->fc.inter_mode_probs[context]); - ++cm->counts.inter_mode[context][inter_mode_offset(mode)]; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.inter_mode[context][inter_mode_offset(mode)]; return mode; } @@ -69,26 +71,21 @@ static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, tx_size += vp9_read(r, tx_probs[2]); } - update_tx_counts(bsize, context, tx_size, &cm->counts.tx); + if (!cm->frame_parallel_decoding_mode) + ++get_tx_counts(bsize, context, &cm->counts.tx)[tx_size]; return tx_size; } -static TX_SIZE read_tx_size(VP9D_COMP *pbi, TX_MODE tx_mode, - BLOCK_SIZE bsize, int allow_select, +static TX_SIZE read_tx_size(VP9_COMMON *const cm, MACROBLOCKD *const xd, + TX_MODE tx_mode, BLOCK_SIZE bsize, int allow_select, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - - if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) + if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) { return read_selected_tx_size(cm, xd, bsize, r); - else if (tx_mode >= ALLOW_32X32 && bsize >= BLOCK_32X32) - return TX_32X32; - else if (tx_mode >= ALLOW_16X16 && bsize >= BLOCK_16X16) - return TX_16X16; - else if (tx_mode >= ALLOW_8X8 && bsize >= BLOCK_8X8) - return TX_8X8; - else - return TX_4X4; + } else { + const TX_SIZE max_tx_size_block = max_txsize_lookup[bsize]; + const TX_SIZE max_tx_size_txmode = tx_mode_to_biggest_tx_size[tx_mode]; + return MIN(max_tx_size_block, max_tx_size_txmode); + } } static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize, @@ -107,11 +104,11 @@ static void set_segment_id(VP9_COMMON *cm, BLOCK_SIZE bsize, cm->last_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; } -static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, +static int read_intra_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int mi_row, int mi_col, vp9_reader *r) { - MACROBLOCKD *const xd = &pbi->mb; - struct segmentation *const seg = &pbi->common.seg; - const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type; + struct segmentation *const seg = &cm->seg; + const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; int segment_id; if (!seg->enabled) @@ -121,16 +118,14 @@ static int read_intra_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, return 0; segment_id = read_segment_id(r, seg); - set_segment_id(&pbi->common, bsize, mi_row, mi_col, segment_id); + set_segment_id(cm, bsize, mi_row, mi_col, segment_id); return segment_id; } -static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, - vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int mi_row, int mi_col, vp9_reader *r) { struct segmentation *const seg = &cm->seg; - const BLOCK_SIZE bsize = xd->this_mi->mbmi.sb_type; + const BLOCK_SIZE bsize = xd->mi_8x8[0]->mbmi.sb_type; int pred_segment_id, segment_id; if (!seg->enabled) @@ -154,37 +149,36 @@ static int read_inter_segment_id(VP9D_COMP *pbi, int mi_row, int mi_col, return segment_id; } -static uint8_t read_skip_coeff(VP9D_COMP *pbi, int segment_id, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static uint8_t read_skip_coeff(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int segment_id, vp9_reader *r) { int skip_coeff = vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); if (!skip_coeff) { const int ctx = vp9_get_pred_context_mbskip(xd); skip_coeff = vp9_read(r, vp9_get_pred_prob_mbskip(cm, xd)); - cm->counts.mbskip[ctx][skip_coeff]++; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.mbskip[ctx][skip_coeff]; } return skip_coeff; } -static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m, +static void read_intra_frame_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + MODE_INFO *const m, int mi_row, int mi_col, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; MB_MODE_INFO *const mbmi = &m->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; const MODE_INFO *above_mi = xd->mi_8x8[-cm->mode_info_stride]; - const MODE_INFO *left_mi = xd->mi_8x8[-1]; + const MODE_INFO *left_mi = xd->left_available ? xd->mi_8x8[-1] : NULL; - mbmi->segment_id = read_intra_segment_id(pbi, mi_row, mi_col, r); - mbmi->skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(pbi, cm->tx_mode, bsize, 1, r); + mbmi->segment_id = read_intra_segment_id(cm, xd, mi_row, mi_col, r); + mbmi->skip_coeff = read_skip_coeff(cm, xd, mbmi->segment_id, r); + mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, bsize, 1, r); mbmi->ref_frame[0] = INTRA_FRAME; mbmi->ref_frame[1] = NONE; if (bsize >= BLOCK_8X8) { const MB_PREDICTION_MODE A = above_block_mode(m, above_mi, 0); - const MB_PREDICTION_MODE L = xd->left_available ? - left_block_mode(m, left_mi, 0) : DC_PRED; + const MB_PREDICTION_MODE L = left_block_mode(m, left_mi, 0); mbmi->mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); } else { // Only 4x4, 4x8, 8x4 blocks @@ -196,8 +190,7 @@ static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m, for (idx = 0; idx < 2; idx += num_4x4_w) { const int ib = idy * 2 + idx; const MB_PREDICTION_MODE A = above_block_mode(m, above_mi, ib); - const MB_PREDICTION_MODE L = (xd->left_available || idx) ? - left_block_mode(m, left_mi, ib) : DC_PRED; + const MB_PREDICTION_MODE L = left_block_mode(m, left_mi, ib); const MB_PREDICTION_MODE b_mode = read_intra_mode(r, vp9_kf_y_mode_prob[A][L]); m->bmi[ib].as_mode = b_mode; @@ -216,7 +209,6 @@ static void read_intra_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *m, static int read_mv_component(vp9_reader *r, const nmv_component *mvcomp, int usehp) { - int mag, d, fr, hp; const int sign = vp9_read(r, mvcomp->sign); const int mv_class = treed_read(r, vp9_mv_class_tree, mvcomp->classes); @@ -267,56 +259,10 @@ static INLINE void read_mv(vp9_reader *r, MV *mv, const MV *ref, mv->col = ref->col + diff.col; } -static void update_mv(vp9_reader *r, vp9_prob *p) { - if (vp9_read(r, NMV_UPDATE_PROB)) - *p = (vp9_read_literal(r, 7) << 1) | 1; -} - -static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) { - int i, j, k; - - for (j = 0; j < MV_JOINTS - 1; ++j) - update_mv(r, &mvc->joints[j]); - - for (i = 0; i < 2; ++i) { - nmv_component *const comp = &mvc->comps[i]; - - update_mv(r, &comp->sign); - - for (j = 0; j < MV_CLASSES - 1; ++j) - update_mv(r, &comp->classes[j]); - - for (j = 0; j < CLASS0_SIZE - 1; ++j) - update_mv(r, &comp->class0[j]); - - for (j = 0; j < MV_OFFSET_BITS; ++j) - update_mv(r, &comp->bits[j]); - } - - for (i = 0; i < 2; ++i) { - nmv_component *const comp = &mvc->comps[i]; - - for (j = 0; j < CLASS0_SIZE; ++j) - for (k = 0; k < 3; ++k) - update_mv(r, &comp->class0_fp[j][k]); - - for (j = 0; j < 3; ++j) - update_mv(r, &comp->fp[j]); - } - - if (allow_hp) { - for (i = 0; i < 2; ++i) { - update_mv(r, &mvc->comps[i].class0_hp); - update_mv(r, &mvc->comps[i].hp); - } - } -} - // Read the referncence frame -static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, +static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, + vp9_reader *r, int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; @@ -329,7 +275,8 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, if (cm->comp_pred_mode == HYBRID_PREDICTION) { is_comp = vp9_read(r, fc->comp_inter_prob[comp_ctx]); - counts->comp_inter[comp_ctx][is_comp]++; + if (!cm->frame_parallel_decoding_mode) + ++counts->comp_inter[comp_ctx][is_comp]; } else { is_comp = cm->comp_pred_mode == COMP_PREDICTION_ONLY; } @@ -339,18 +286,21 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int ref_ctx = vp9_get_pred_context_comp_ref_p(cm, xd); const int b = vp9_read(r, fc->comp_ref_prob[ref_ctx]); - counts->comp_ref[ref_ctx][b]++; + if (!cm->frame_parallel_decoding_mode) + ++counts->comp_ref[ref_ctx][b]; ref_frame[fix_ref_idx] = cm->comp_fixed_ref; ref_frame[!fix_ref_idx] = cm->comp_var_ref[b]; } else { const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); const int bit0 = vp9_read(r, fc->single_ref_prob[ctx0][0]); - ++counts->single_ref[ctx0][0][bit0]; + if (!cm->frame_parallel_decoding_mode) + ++counts->single_ref[ctx0][0][bit0]; if (bit0) { const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); const int bit1 = vp9_read(r, fc->single_ref_prob[ctx1][1]); ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; - ++counts->single_ref[ctx1][1][bit1]; + if (!cm->frame_parallel_decoding_mode) + ++counts->single_ref[ctx1][1][bit1]; } else { ref_frame[0] = LAST_FRAME; } @@ -360,42 +310,19 @@ static void read_ref_frames(VP9D_COMP *pbi, vp9_reader *r, } } -static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) { - int i, j; - for (j = 0; j < SWITCHABLE_FILTERS + 1; ++j) - for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, - &fc->switchable_interp_prob[j][i]); -} - -static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { - int i, j; - for (i = 0; i < INTER_MODE_CONTEXTS; ++i) - for (j = 0; j < INTER_MODES - 1; ++j) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &fc->inter_mode_probs[i][j]); -} - -static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { - COMPPREDMODE_TYPE mode = vp9_read_bit(r); - if (mode) - mode += vp9_read_bit(r); - return mode; -} -static INLINE INTERPOLATIONFILTERTYPE read_switchable_filter_type( - VP9D_COMP *pbi, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static INLINE INTERPOLATION_TYPE read_switchable_filter_type( + VP9_COMMON *const cm, MACROBLOCKD *const xd, vp9_reader *r) { const int ctx = vp9_get_pred_context_switchable_interp(xd); const int type = treed_read(r, vp9_switchable_interp_tree, cm->fc.switchable_interp_prob[ctx]); - ++cm->counts.switchable_interp[ctx][type]; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.switchable_interp[ctx][type]; return type; } -static void read_intra_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, - vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; +static void read_intra_block_mode_info(VP9_COMMON *const cm, MODE_INFO *mi, + vp9_reader *r) { MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mi->mbmi.sb_type; @@ -435,60 +362,64 @@ static INLINE int assign_mv(VP9_COMMON *cm, MB_PREDICTION_MODE mode, int ret = 1; switch (mode) { - case NEWMV: - read_mv(r, &mv[0].as_mv, &best_mv[0].as_mv, - &cm->fc.nmvc, &cm->counts.mv, allow_hp); - if (is_compound) - read_mv(r, &mv[1].as_mv, &best_mv[1].as_mv, - &cm->fc.nmvc, &cm->counts.mv, allow_hp); - for (i = 0; i < 1 + is_compound; ++i) { - ret = ret && mv[i].as_mv.row < MV_UPP && mv[i].as_mv.row > MV_LOW; - ret = ret && mv[i].as_mv.col < MV_UPP && mv[i].as_mv.col > MV_LOW; - } - break; - case NEARESTMV: - mv[0].as_int = nearest_mv[0].as_int; + case NEWMV: { + nmv_context_counts *const mv_counts = cm->frame_parallel_decoding_mode ? + NULL : &cm->counts.mv; + read_mv(r, &mv[0].as_mv, &best_mv[0].as_mv, + &cm->fc.nmvc, mv_counts, allow_hp); if (is_compound) - mv[1].as_int = nearest_mv[1].as_int; + read_mv(r, &mv[1].as_mv, &best_mv[1].as_mv, + &cm->fc.nmvc, mv_counts, allow_hp); + for (i = 0; i < 1 + is_compound; ++i) { + ret = ret && mv[i].as_mv.row < MV_UPP && mv[i].as_mv.row > MV_LOW; + ret = ret && mv[i].as_mv.col < MV_UPP && mv[i].as_mv.col > MV_LOW; + } break; - case NEARMV: + } + case NEARESTMV: { + mv[0].as_int = nearest_mv[0].as_int; + if (is_compound) mv[1].as_int = nearest_mv[1].as_int; + break; + } + case NEARMV: { mv[0].as_int = near_mv[0].as_int; - if (is_compound) - mv[1].as_int = near_mv[1].as_int; + if (is_compound) mv[1].as_int = near_mv[1].as_int; break; - case ZEROMV: + } + case ZEROMV: { mv[0].as_int = 0; - if (is_compound) - mv[1].as_int = 0; + if (is_compound) mv[1].as_int = 0; break; - default: + } + default: { return 0; + } } return ret; } -static int read_is_inter_block(VP9D_COMP *pbi, int segment_id, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - +static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int segment_id, vp9_reader *r) { if (vp9_segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { return vp9_get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; } else { const int ctx = vp9_get_pred_context_intra_inter(xd); const int is_inter = vp9_read(r, vp9_get_pred_prob_intra_inter(cm, xd)); - ++cm->counts.intra_inter[ctx][is_inter]; + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.intra_inter[ctx][is_inter]; return is_inter; } } -static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, +static void read_inter_block_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + const TileInfo *const tile, + MODE_INFO *const mi, int mi_row, int mi_col, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; MB_MODE_INFO *const mbmi = &mi->mbmi; const BLOCK_SIZE bsize = mbmi->sb_type; - const int allow_hp = xd->allow_high_precision_mv; + const int allow_hp = cm->allow_high_precision_mv; int_mv nearest[2], nearmv[2], best[2]; uint8_t inter_mode_ctx; @@ -496,11 +427,11 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, int is_compound; mbmi->uv_mode = DC_PRED; - read_ref_frames(pbi, r, mbmi->segment_id, mbmi->ref_frame); + read_ref_frames(cm, xd, r, mbmi->segment_id, mbmi->ref_frame); ref0 = mbmi->ref_frame[0]; is_compound = has_second_ref(mbmi); - vp9_find_mv_refs(cm, xd, mi, xd->last_mi, ref0, mbmi->ref_mvs[ref0], + vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref0, mbmi->ref_mvs[ref0], mi_row, mi_col); inter_mode_ctx = mbmi->mode_context[ref0]; @@ -519,24 +450,26 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, // nearest, nearby if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref0], &nearest[0], &nearmv[0]); + vp9_find_best_ref_mvs(xd, allow_hp, + mbmi->ref_mvs[ref0], &nearest[0], &nearmv[0]); best[0].as_int = nearest[0].as_int; } if (is_compound) { const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1]; - vp9_find_mv_refs(cm, xd, mi, xd->last_mi, + vp9_find_mv_refs(cm, xd, tile, mi, xd->last_mi, ref1, mbmi->ref_mvs[ref1], mi_row, mi_col); if (bsize < BLOCK_8X8 || mbmi->mode != ZEROMV) { - vp9_find_best_ref_mvs(xd, mbmi->ref_mvs[ref1], &nearest[1], &nearmv[1]); + vp9_find_best_ref_mvs(xd, allow_hp, + mbmi->ref_mvs[ref1], &nearest[1], &nearmv[1]); best[1].as_int = nearest[1].as_int; } } - mbmi->interp_filter = cm->mcomp_filter_type == SWITCHABLE - ? read_switchable_filter_type(pbi, r) - : cm->mcomp_filter_type; + mbmi->interp_filter = (cm->mcomp_filter_type == SWITCHABLE) + ? read_switchable_filter_type(cm, xd, r) + : cm->mcomp_filter_type; if (bsize < BLOCK_8X8) { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; // 1 or 2 @@ -550,12 +483,12 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, b_mode = read_inter_mode(cm, r, inter_mode_ctx); if (b_mode == NEARESTMV || b_mode == NEARMV) { - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest[0], + vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, &nearest[0], &nearmv[0], j, 0, mi_row, mi_col); if (is_compound) - vp9_append_sub8x8_mvs_for_idx(cm, xd, &nearest[1], + vp9_append_sub8x8_mvs_for_idx(cm, xd, tile, &nearest[1], &nearmv[1], j, 1, mi_row, mi_col); } @@ -589,88 +522,32 @@ static void read_inter_block_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, } } -static void read_inter_frame_mode_info(VP9D_COMP *pbi, MODE_INFO *mi, +static void read_inter_frame_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + const TileInfo *const tile, + MODE_INFO *const mi, int mi_row, int mi_col, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; MB_MODE_INFO *const mbmi = &mi->mbmi; int inter_block; mbmi->mv[0].as_int = 0; mbmi->mv[1].as_int = 0; - mbmi->segment_id = read_inter_segment_id(pbi, mi_row, mi_col, r); - mbmi->skip_coeff = read_skip_coeff(pbi, mbmi->segment_id, r); - inter_block = read_is_inter_block(pbi, mbmi->segment_id, r); - mbmi->tx_size = read_tx_size(pbi, cm->tx_mode, mbmi->sb_type, + mbmi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r); + mbmi->skip_coeff = read_skip_coeff(cm, xd, mbmi->segment_id, r); + inter_block = read_is_inter_block(cm, xd, mbmi->segment_id, r); + mbmi->tx_size = read_tx_size(cm, xd, cm->tx_mode, mbmi->sb_type, !mbmi->skip_coeff || !inter_block, r); if (inter_block) - read_inter_block_mode_info(pbi, mi, mi_row, mi_col, r); + read_inter_block_mode_info(cm, xd, tile, mi, mi_row, mi_col, r); else - read_intra_block_mode_info(pbi, mi, r); + read_intra_block_mode_info(cm, mi, r); } -static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { - int i; - - cm->comp_pred_mode = cm->allow_comp_inter_inter ? read_comp_pred_mode(r) - : SINGLE_PREDICTION_ONLY; - - if (cm->comp_pred_mode == HYBRID_PREDICTION) - for (i = 0; i < COMP_INTER_CONTEXTS; i++) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.comp_inter_prob[i]); - - if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) - for (i = 0; i < REF_CONTEXTS; i++) { - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.single_ref_prob[i][0]); - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.single_ref_prob[i][1]); - } - - if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) - for (i = 0; i < REF_CONTEXTS; i++) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.comp_ref_prob[i]); -} - -void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - int k; - - // TODO(jkoleszar): does this clear more than MBSKIP_CONTEXTS? Maybe remove. - // vpx_memset(cm->fc.mbskip_probs, 0, sizeof(cm->fc.mbskip_probs)); - for (k = 0; k < MBSKIP_CONTEXTS; ++k) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.mbskip_probs[k]); - - if (cm->frame_type != KEY_FRAME && !cm->intra_only) { - nmv_context *const nmvc = &pbi->common.fc.nmvc; - MACROBLOCKD *const xd = &pbi->mb; - int i, j; - - read_inter_mode_probs(&cm->fc, r); - - if (cm->mcomp_filter_type == SWITCHABLE) - read_switchable_interp_probs(&cm->fc, r); - - for (i = 0; i < INTRA_INTER_CONTEXTS; i++) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.intra_inter_prob[i]); - - read_comp_pred(cm, r); - - for (j = 0; j < BLOCK_SIZE_GROUPS; j++) - for (i = 0; i < INTRA_MODES - 1; ++i) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &cm->fc.y_mode_prob[j][i]); - - for (j = 0; j < NUM_PARTITION_CONTEXTS; ++j) - for (i = 0; i < PARTITION_TYPES - 1; ++i) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, - &cm->fc.partition_prob[INTER_FRAME][j][i]); - - read_mv_probs(r, nmvc, xd->allow_high_precision_mv); - } -} - -void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - MODE_INFO *mi = xd->this_mi; +void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, + const TileInfo *const tile, + int mi_row, int mi_col, vp9_reader *r) { + MODE_INFO *const mi = xd->mi_8x8[0]; const BLOCK_SIZE bsize = mi->mbmi.sb_type; const int bw = 1 << mi_width_log2(bsize); const int bh = 1 << mi_height_log2(bsize); @@ -678,13 +555,14 @@ void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r) { const int x_mis = MIN(bw, cm->mi_cols - mi_col); int x, y, z; - if (cm->frame_type == KEY_FRAME || cm->intra_only) - read_intra_frame_mode_info(pbi, mi, mi_row, mi_col, r); + if (frame_is_intra_only(cm)) + read_intra_frame_mode_info(cm, xd, mi, mi_row, mi_col, r); else - read_inter_frame_mode_info(pbi, mi, mi_row, mi_col, r); + read_inter_frame_mode_info(cm, xd, tile, mi, mi_row, mi_col, r); - for (y = 0, z = 0; y < y_mis; y++, z += cm->mode_info_stride) + for (y = 0, z = 0; y < y_mis; y++, z += cm->mode_info_stride) { for (x = !y; x < x_mis; x++) { - xd->mi_8x8[z + x] = mi; - } + xd->mi_8x8[z + x] = mi; + } + } } diff --git a/source/libvpx/vp9/decoder/vp9_decodemv.h b/source/libvpx/vp9/decoder/vp9_decodemv.h index 462d2e3..8e9ae4a 100644 --- a/source/libvpx/vp9/decoder/vp9_decodemv.h +++ b/source/libvpx/vp9/decoder/vp9_decodemv.h @@ -14,8 +14,10 @@ #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_dboolhuff.h" -void vp9_prepare_read_mode_info(VP9D_COMP* pbi, vp9_reader *r); +struct TileInfo; -void vp9_read_mode_info(VP9D_COMP* pbi, int mi_row, int mi_col, vp9_reader *r); +void vp9_read_mode_info(VP9_COMMON *cm, MACROBLOCKD *xd, + const struct TileInfo *const tile, + int mi_row, int mi_col, vp9_reader *r); #endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/source/libvpx/vp9/decoder/vp9_decodframe.c b/source/libvpx/vp9/decoder/vp9_decodframe.c index 77b66cd..e3a2b77 100644 --- a/source/libvpx/vp9/decoder/vp9_decodframe.c +++ b/source/libvpx/vp9/decoder/vp9_decodframe.c @@ -19,6 +19,7 @@ #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_extend.h" +#include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconintra.h" @@ -31,16 +32,48 @@ #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_dsubexp.h" -#include "vp9/decoder/vp9_idct_blk.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_read_bit_buffer.h" #include "vp9/decoder/vp9_thread.h" #include "vp9/decoder/vp9_treereader.h" +typedef struct TileWorkerData { + VP9_COMMON *cm; + vp9_reader bit_reader; + DECLARE_ALIGNED(16, MACROBLOCKD, xd); +} TileWorkerData; + static int read_be32(const uint8_t *p) { return (p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]; } +static int is_compound_prediction_allowed(const VP9_COMMON *cm) { + int i; + for (i = 1; i < ALLOWED_REFS_PER_FRAME; ++i) + if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) + return 1; + + return 0; +} + +static void setup_compound_prediction(VP9_COMMON *cm) { + if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[GOLDEN_FRAME]) { + cm->comp_fixed_ref = ALTREF_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = GOLDEN_FRAME; + } else if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[ALTREF_FRAME]) { + cm->comp_fixed_ref = GOLDEN_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } else { + cm->comp_fixed_ref = LAST_FRAME; + cm->comp_var_ref[0] = GOLDEN_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } +} + // len == 0 is not allowed static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { return start + len > start && start + len <= end; @@ -63,15 +96,105 @@ static void read_tx_probs(struct tx_probs *tx_probs, vp9_reader *r) { for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 3; ++j) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p8x8[i][j]); + vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 2; ++j) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p16x16[i][j]); + vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 1; ++j) - vp9_diff_update_prob(r, MODE_UPDATE_PROB, &tx_probs->p32x32[i][j]); + vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); +} + +static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vp9_reader *r) { + int i, j; + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) + for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) + vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); +} + +static void read_inter_mode_probs(FRAME_CONTEXT *fc, vp9_reader *r) { + int i, j; + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + for (j = 0; j < INTER_MODES - 1; ++j) + vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); +} + +static INLINE COMPPREDMODE_TYPE read_comp_pred_mode(vp9_reader *r) { + COMPPREDMODE_TYPE mode = vp9_read_bit(r); + if (mode) + mode += vp9_read_bit(r); + return mode; +} + +static void read_comp_pred(VP9_COMMON *cm, vp9_reader *r) { + int i; + + const int compound_allowed = is_compound_prediction_allowed(cm); + cm->comp_pred_mode = compound_allowed ? read_comp_pred_mode(r) + : SINGLE_PREDICTION_ONLY; + if (compound_allowed) + setup_compound_prediction(cm); + + if (cm->comp_pred_mode == HYBRID_PREDICTION) + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + vp9_diff_update_prob(r, &cm->fc.comp_inter_prob[i]); + + if (cm->comp_pred_mode != COMP_PREDICTION_ONLY) + for (i = 0; i < REF_CONTEXTS; i++) { + vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][0]); + vp9_diff_update_prob(r, &cm->fc.single_ref_prob[i][1]); + } + + if (cm->comp_pred_mode != SINGLE_PREDICTION_ONLY) + for (i = 0; i < REF_CONTEXTS; i++) + vp9_diff_update_prob(r, &cm->fc.comp_ref_prob[i]); +} + +static void update_mv(vp9_reader *r, vp9_prob *p) { + if (vp9_read(r, NMV_UPDATE_PROB)) + *p = (vp9_read_literal(r, 7) << 1) | 1; +} + +static void read_mv_probs(vp9_reader *r, nmv_context *mvc, int allow_hp) { + int i, j, k; + + for (j = 0; j < MV_JOINTS - 1; ++j) + update_mv(r, &mvc->joints[j]); + + for (i = 0; i < 2; ++i) { + nmv_component *const comp = &mvc->comps[i]; + + update_mv(r, &comp->sign); + + for (j = 0; j < MV_CLASSES - 1; ++j) + update_mv(r, &comp->classes[j]); + + for (j = 0; j < CLASS0_SIZE - 1; ++j) + update_mv(r, &comp->class0[j]); + + for (j = 0; j < MV_OFFSET_BITS; ++j) + update_mv(r, &comp->bits[j]); + } + + for (i = 0; i < 2; ++i) { + nmv_component *const comp = &mvc->comps[i]; + + for (j = 0; j < CLASS0_SIZE; ++j) + for (k = 0; k < 3; ++k) + update_mv(r, &comp->class0_fp[j][k]); + + for (j = 0; j < 3; ++j) + update_mv(r, &comp->fp[j]); + } + + if (allow_hp) { + for (i = 0; i < 2; ++i) { + update_mv(r, &mvc->comps[i].class0_hp); + update_mv(r, &mvc->comps[i].hp); + } + } } static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { @@ -82,47 +205,107 @@ static void setup_plane_dequants(VP9_COMMON *cm, MACROBLOCKD *xd, int q_index) { xd->plane[i].dequant = cm->uv_dequant[q_index]; } -static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - MACROBLOCKD* const xd = arg; +// Allocate storage for each tile column. +// TODO(jzern): when max_threads <= 1 the same storage could be used for each +// tile. +static void alloc_tile_storage(VP9D_COMP *pbi, int tile_cols) { + VP9_COMMON *const cm = &pbi->common; + const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + int i, tile_col; + + CHECK_MEM_ERROR(cm, pbi->mi_streams, + vpx_realloc(pbi->mi_streams, tile_cols * + sizeof(*pbi->mi_streams))); + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + TileInfo tile; + + vp9_tile_init(&tile, cm, 0, tile_col); + pbi->mi_streams[tile_col] = + &cm->mi[cm->mi_rows * tile.mi_col_start]; + } + + // 2 contexts per 'mi unit', so that we have one context per 4x4 txfm + // block where mi unit size is 8x8. + CHECK_MEM_ERROR(cm, pbi->above_context[0], + vpx_realloc(pbi->above_context[0], + sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * + 2 * aligned_mi_cols)); + for (i = 1; i < MAX_MB_PLANE; ++i) { + pbi->above_context[i] = pbi->above_context[0] + + i * sizeof(*pbi->above_context[0]) * + 2 * aligned_mi_cols; + } + + // This is sized based on the entire frame. Each tile operates within its + // column bounds. + CHECK_MEM_ERROR(cm, pbi->above_seg_context, + vpx_realloc(pbi->above_seg_context, + sizeof(*pbi->above_seg_context) * + aligned_mi_cols)); +} + +static void inverse_transform_block(MACROBLOCKD* xd, int plane, int block, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { struct macroblockd_plane *const pd = &xd->plane[plane]; int16_t* const qcoeff = BLOCK_OFFSET(pd->qcoeff, block); const int stride = pd->dst.stride; const int eob = pd->eobs[block]; - const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, - block); - uint8_t* const dst = raster_block_offset_uint8(plane_bsize, raster_block, - pd->dst.buf, stride); - switch (tx_size) { - case TX_4X4: { - const TX_TYPE tx_type = get_tx_type_4x4(pd->plane_type, xd, raster_block); - if (tx_type == DCT_DCT) - xd->itxm_add(qcoeff, dst, stride, eob); + if (eob > 0) { + TX_TYPE tx_type; + const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, + block); + uint8_t* const dst = raster_block_offset_uint8(plane_bsize, raster_block, + pd->dst.buf, stride); + switch (tx_size) { + case TX_4X4: + tx_type = get_tx_type_4x4(pd->plane_type, xd, raster_block); + if (tx_type == DCT_DCT) + xd->itxm_add(qcoeff, dst, stride, eob); + else + vp9_iht4x4_add(tx_type, qcoeff, dst, stride, eob); + break; + case TX_8X8: + tx_type = get_tx_type_8x8(pd->plane_type, xd); + vp9_iht8x8_add(tx_type, qcoeff, dst, stride, eob); + break; + case TX_16X16: + tx_type = get_tx_type_16x16(pd->plane_type, xd); + vp9_iht16x16_add(tx_type, qcoeff, dst, stride, eob); + break; + case TX_32X32: + tx_type = DCT_DCT; + vp9_idct32x32_add(qcoeff, dst, stride, eob); + break; + default: + assert(!"Invalid transform size"); + } + + if (eob == 1) { + vpx_memset(qcoeff, 0, 2 * sizeof(qcoeff[0])); + } else { + if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) + vpx_memset(qcoeff, 0, 4 * (4 << tx_size) * sizeof(qcoeff[0])); else - vp9_iht_add_c(tx_type, qcoeff, dst, stride, eob); - break; + vpx_memset(qcoeff, 0, (16 << (tx_size << 1)) * sizeof(qcoeff[0])); } - case TX_8X8: - vp9_iht_add_8x8_c(get_tx_type_8x8(pd->plane_type, xd), qcoeff, dst, - stride, eob); - break; - case TX_16X16: - vp9_iht_add_16x16_c(get_tx_type_16x16(pd->plane_type, xd), qcoeff, dst, - stride, eob); - break; - case TX_32X32: - vp9_idct_add_32x32(qcoeff, dst, stride, eob); - break; - default: - assert(!"Invalid transform size"); } } -static void decode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *arg) { - MACROBLOCKD* const xd = arg; +struct intra_args { + VP9_COMMON *cm; + MACROBLOCKD *xd; + vp9_reader *r; +}; + +static void predict_and_reconstruct_intra_block(int plane, int block, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { + struct intra_args *const args = arg; + VP9_COMMON *const cm = args->cm; + MACROBLOCKD *const xd = args->xd; + struct macroblockd_plane *const pd = &xd->plane[plane]; - MODE_INFO *const mi = xd->this_mi; + MODE_INFO *const mi = xd->mi_8x8[0]; const int raster_block = txfrm_block_to_raster_block(plane_bsize, tx_size, block); uint8_t* const dst = raster_block_offset_uint8(plane_bsize, raster_block, @@ -139,32 +322,35 @@ static void decode_block_intra(int plane, int block, BLOCK_SIZE plane_bsize, b_width_log2(plane_bsize), tx_size, mode, dst, pd->dst.stride, dst, pd->dst.stride); - if (!mi->mbmi.skip_coeff) - decode_block(plane, block, plane_bsize, tx_size, arg); + if (!mi->mbmi.skip_coeff) { + vp9_decode_block_tokens(cm, xd, plane, block, plane_bsize, tx_size, + args->r); + inverse_transform_block(xd, plane, block, plane_bsize, tx_size); + } } -static int decode_tokens(VP9D_COMP *pbi, BLOCK_SIZE bsize, vp9_reader *r) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; - - if (mbmi->skip_coeff) { - reset_skip_context(xd, bsize); - return -1; - } else { - if (cm->seg.enabled) - setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id, - cm->base_qindex)); - - // TODO(dkovalev) if (!vp9_reader_has_error(r)) - return vp9_decode_tokens(pbi, r, bsize); - } +struct inter_args { + VP9_COMMON *cm; + MACROBLOCKD *xd; + vp9_reader *r; + int *eobtotal; +}; + +static void reconstruct_inter_block(int plane, int block, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, void *arg) { + struct inter_args *args = arg; + VP9_COMMON *const cm = args->cm; + MACROBLOCKD *const xd = args->xd; + + *args->eobtotal += vp9_decode_block_tokens(cm, xd, plane, block, + plane_bsize, tx_size, args->r); + inverse_transform_block(xd, plane, block, plane_bsize, tx_size); } -static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE bsize, - int mi_row, int mi_col) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; +static void set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + BLOCK_SIZE bsize, int mi_row, int mi_col) { const int bh = num_8x8_blocks_high_lookup[bsize]; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int offset = mi_row * cm->mode_info_stride + mi_col; @@ -175,178 +361,178 @@ static void set_offsets(VP9D_COMP *pbi, BLOCK_SIZE bsize, xd->prev_mi_8x8 = cm->prev_mi_grid_visible + offset; // we are using the mode info context stream here - xd->this_mi = - xd->mi_8x8[0] = xd->mic_stream_ptr; - xd->this_mi->mbmi.sb_type = bsize; - xd->mic_stream_ptr++; + xd->mi_8x8[0] = xd->mi_stream; + xd->mi_8x8[0]->mbmi.sb_type = bsize; + ++xd->mi_stream; // Special case: if prev_mi is NULL, the previous mode info context // cannot be used. xd->last_mi = cm->prev_mi ? xd->prev_mi_8x8[0] : NULL; - set_skip_context(cm, xd, mi_row, mi_col); - set_partition_seg_context(cm, xd, mi_row, mi_col); + set_skip_context(xd, xd->above_context, xd->left_context, mi_row, mi_col); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units - set_mi_row_col(cm, xd, mi_row, bh, mi_col, bw); + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); - setup_dst_planes(xd, &cm->yv12_fb[cm->new_fb_idx], mi_row, mi_col); + setup_dst_planes(xd, get_frame_new_buffer(cm), mi_row, mi_col); } -static void set_ref(VP9D_COMP *pbi, int i, int mi_row, int mi_col) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; - MB_MODE_INFO *const mbmi = &xd->this_mi->mbmi; - const int ref = mbmi->ref_frame[i] - LAST_FRAME; - const YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->active_ref_idx[ref]]; - const struct scale_factors *sf = &cm->active_ref_scale[ref]; - if (!vp9_is_valid_scale(sf)) +static void set_ref(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int idx, int mi_row, int mi_col) { + MB_MODE_INFO *const mbmi = &xd->mi_8x8[0]->mbmi; + const int ref = mbmi->ref_frame[idx] - LAST_FRAME; + const YV12_BUFFER_CONFIG *cfg = get_frame_ref_buffer(cm, ref); + const struct scale_factors_common *sfc = &cm->active_ref_scale_comm[ref]; + if (!vp9_is_valid_scale(sfc)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid scale factors"); - xd->scale_factor[i] = *sf; - setup_pre_planes(xd, i, cfg, mi_row, mi_col, sf); + xd->scale_factor[idx].sfc = sfc; + setup_pre_planes(xd, idx, cfg, mi_row, mi_col, &xd->scale_factor[idx]); xd->corrupted |= cfg->corrupted; } -static void decode_modes_b(VP9D_COMP *pbi, int mi_row, int mi_col, +static void decode_modes_b(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, vp9_reader *r, BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; const int less8x8 = bsize < BLOCK_8X8; MB_MODE_INFO *mbmi; - if (less8x8) - if (xd->ab_index > 0) - return; - - set_offsets(pbi, bsize, mi_row, mi_col); - vp9_read_mode_info(pbi, mi_row, mi_col, r); + set_offsets(cm, xd, tile, bsize, mi_row, mi_col); + vp9_read_mode_info(cm, xd, tile, mi_row, mi_col, r); if (less8x8) bsize = BLOCK_8X8; // Has to be called after set_offsets - mbmi = &xd->this_mi->mbmi; + mbmi = &xd->mi_8x8[0]->mbmi; - if (!is_inter_block(mbmi)) { - // Intra reconstruction - decode_tokens(pbi, bsize, r); - foreach_transformed_block(xd, bsize, decode_block_intra, xd); + if (mbmi->skip_coeff) { + reset_skip_context(xd, bsize); } else { - // Inter reconstruction - int eobtotal; + if (cm->seg.enabled) + setup_plane_dequants(cm, xd, vp9_get_qindex(&cm->seg, mbmi->segment_id, + cm->base_qindex)); + } - set_ref(pbi, 0, mi_row, mi_col); + if (!is_inter_block(mbmi)) { + struct intra_args arg = { cm, xd, r }; + foreach_transformed_block(xd, bsize, predict_and_reconstruct_intra_block, + &arg); + } else { + // Setup + set_ref(cm, xd, 0, mi_row, mi_col); if (has_second_ref(mbmi)) - set_ref(pbi, 1, mi_row, mi_col); + set_ref(cm, xd, 1, mi_row, mi_col); + + xd->subpix.filter_x = xd->subpix.filter_y = + vp9_get_filter_kernel(mbmi->interp_filter); - vp9_setup_interp_filters(xd, mbmi->interp_filter, cm); + // Prediction vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); - eobtotal = decode_tokens(pbi, bsize, r); - if (less8x8) { - if (eobtotal >= 0) - foreach_transformed_block(xd, bsize, decode_block, xd); - } else { - assert(mbmi->sb_type == bsize); - if (eobtotal == 0) - // skip loopfilter - vp9_set_pred_flag_mbskip(xd, bsize, 1); - else if (eobtotal > 0) - foreach_transformed_block(xd, bsize, decode_block, xd); + + // Reconstruction + if (!mbmi->skip_coeff) { + int eobtotal = 0; + struct inter_args arg = { cm, xd, r, &eobtotal }; + foreach_transformed_block(xd, bsize, reconstruct_inter_block, &arg); + if (!less8x8 && eobtotal == 0) + mbmi->skip_coeff = 1; // skip loopfilter } } + xd->corrupted |= vp9_reader_has_error(r); } -static void decode_modes_sb(VP9D_COMP *pbi, int mi_row, int mi_col, +static PARTITION_TYPE read_partition(int hbs, int mi_rows, int mi_cols, + int mi_row, int mi_col, + vp9_prob probs[PARTITION_TYPES - 1], + vp9_reader *r) { + const int has_rows = (mi_row + hbs) < mi_rows; + const int has_cols = (mi_col + hbs) < mi_cols; + + if (has_rows && has_cols) + return treed_read(r, vp9_partition_tree, probs); + else if (!has_rows && has_cols) + return vp9_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; + else if (has_rows && !has_cols) + return vp9_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; + else + return PARTITION_SPLIT; +} + +static void decode_modes_sb(VP9_COMMON *const cm, MACROBLOCKD *const xd, + const TileInfo *const tile, + int mi_row, int mi_col, vp9_reader* r, BLOCK_SIZE bsize) { - VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; const int hbs = num_8x8_blocks_wide_lookup[bsize] / 2; - PARTITION_TYPE partition = PARTITION_NONE; + PARTITION_TYPE partition; BLOCK_SIZE subsize; + int ctx; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; - if (bsize < BLOCK_8X8) { - if (xd->ab_index != 0) - return; - } else { - int pl; - const int idx = check_bsize_coverage(hbs, cm->mi_rows, cm->mi_cols, - mi_row, mi_col); - set_partition_seg_context(cm, xd, mi_row, mi_col); - pl = partition_plane_context(xd, bsize); - - if (idx == 0) - partition = treed_read(r, vp9_partition_tree, - cm->fc.partition_prob[cm->frame_type][pl]); - else if (idx > 0 && - !vp9_read(r, cm->fc.partition_prob[cm->frame_type][pl][idx])) - partition = (idx == 1) ? PARTITION_HORZ : PARTITION_VERT; - else - partition = PARTITION_SPLIT; + ctx = partition_plane_context(xd->above_seg_context, xd->left_seg_context, + mi_row, mi_col, bsize); + partition = read_partition(hbs, cm->mi_rows, cm->mi_cols, mi_row, mi_col, + cm->fc.partition_prob[cm->frame_type][ctx], r); - cm->counts.partition[pl][partition]++; - } + if (!cm->frame_parallel_decoding_mode) + ++cm->counts.partition[ctx][partition]; subsize = get_subsize(bsize, partition); - *get_sb_index(xd, subsize) = 0; - - switch (partition) { - case PARTITION_NONE: - decode_modes_b(pbi, mi_row, mi_col, r, subsize); - break; - case PARTITION_HORZ: - decode_modes_b(pbi, mi_row, mi_col, r, subsize); - *get_sb_index(xd, subsize) = 1; - if (mi_row + hbs < cm->mi_rows) - decode_modes_b(pbi, mi_row + hbs, mi_col, r, subsize); - break; - case PARTITION_VERT: - decode_modes_b(pbi, mi_row, mi_col, r, subsize); - *get_sb_index(xd, subsize) = 1; - if (mi_col + hbs < cm->mi_cols) - decode_modes_b(pbi, mi_row, mi_col + hbs, r, subsize); - break; - case PARTITION_SPLIT: { - int n; - for (n = 0; n < 4; n++) { - const int j = n >> 1, i = n & 1; - *get_sb_index(xd, subsize) = n; - decode_modes_sb(pbi, mi_row + j * hbs, mi_col + i * hbs, r, subsize); - } - } break; - default: - assert(!"Invalid partition type"); + if (subsize < BLOCK_8X8) { + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); + } else { + switch (partition) { + case PARTITION_NONE: + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); + break; + case PARTITION_HORZ: + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); + if (mi_row + hbs < cm->mi_rows) + decode_modes_b(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); + break; + case PARTITION_VERT: + decode_modes_b(cm, xd, tile, mi_row, mi_col, r, subsize); + if (mi_col + hbs < cm->mi_cols) + decode_modes_b(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); + break; + case PARTITION_SPLIT: + decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, subsize); + decode_modes_sb(cm, xd, tile, mi_row, mi_col + hbs, r, subsize); + decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col, r, subsize); + decode_modes_sb(cm, xd, tile, mi_row + hbs, mi_col + hbs, r, subsize); + break; + default: + assert(!"Invalid partition type"); + } } // update partition context if (bsize >= BLOCK_8X8 && - (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) { - set_partition_seg_context(cm, xd, mi_row, mi_col); - update_partition_context(xd, subsize, bsize); - } + (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) + update_partition_context(xd->above_seg_context, xd->left_seg_context, + mi_row, mi_col, subsize, bsize); } -static void setup_token_decoder(VP9D_COMP *pbi, - const uint8_t *data, size_t read_size, +static void setup_token_decoder(const uint8_t *data, + const uint8_t *data_end, + size_t read_size, + struct vpx_internal_error_info *error_info, vp9_reader *r) { - VP9_COMMON *cm = &pbi->common; - const uint8_t *data_end = pbi->source + pbi->source_sz; - // Validate the calculated partition length. If the buffer // described by the partition can't be fully read, then restrict // it to the portion that can be (for EC mode) or throw an error. if (!read_is_valid(data, read_size, data_end)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt tile length"); if (vp9_reader_init(r, data, read_size)) - vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", 1); } @@ -361,8 +547,7 @@ static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, for (l = 0; l < PREV_COEF_CONTEXTS; l++) if (k > 0 || l < 3) for (m = 0; m < UNCONSTRAINED_NODES; m++) - vp9_diff_update_prob(r, VP9_COEF_UPDATE_PROB, - &coef_probs[i][j][k][l][m]); + vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); } static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, @@ -433,7 +618,6 @@ static void setup_segmentation(struct segmentation *seg, static void setup_loopfilter(struct loopfilter *lf, struct vp9_read_bit_buffer *rb) { - lf->filter_level = vp9_rb_read_literal(rb, 6); lf->sharpness_level = vp9_rb_read_literal(rb, 3); @@ -464,9 +648,8 @@ static int read_delta_q(struct vp9_read_bit_buffer *rb, int *delta_q) { return old != *delta_q; } -static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { - MACROBLOCKD *const xd = &pbi->mb; - VP9_COMMON *const cm = &pbi->common; +static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, + struct vp9_read_bit_buffer *rb) { int update = 0; cm->base_qindex = vp9_rb_read_literal(rb, QINDEX_BITS); @@ -481,16 +664,15 @@ static void setup_quantization(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; - xd->itxm_add = xd->lossless ? vp9_idct_add_lossless_c - : vp9_idct_add; + xd->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; } -static INTERPOLATIONFILTERTYPE read_interp_filter_type( - struct vp9_read_bit_buffer *rb) { - const INTERPOLATIONFILTERTYPE literal_to_type[] = { EIGHTTAP_SMOOTH, - EIGHTTAP, - EIGHTTAP_SHARP, - BILINEAR }; +static INTERPOLATION_TYPE read_interp_filter_type( + struct vp9_read_bit_buffer *rb) { + const INTERPOLATION_TYPE literal_to_type[] = { EIGHTTAP_SMOOTH, + EIGHTTAP, + EIGHTTAP_SHARP, + BILINEAR }; return vp9_rb_read_bit(rb) ? SWITCHABLE : literal_to_type[vp9_rb_read_literal(rb, 2)]; } @@ -536,7 +718,7 @@ static void apply_frame_size(VP9D_COMP *pbi, int width, int height) { vp9_update_frame_size(cm); } - vp9_realloc_frame_buffer(&cm->yv12_fb[cm->new_fb_idx], cm->width, cm->height, + vp9_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9BORDERINPIXELS); } @@ -557,7 +739,7 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, int found = 0, i; for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) { if (vp9_rb_read_bit(rb)) { - YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->active_ref_idx[i]]; + YV12_BUFFER_CONFIG *const cfg = get_frame_ref_buffer(cm, i); width = cfg->y_crop_width; height = cfg->y_crop_height; found = 1; @@ -576,67 +758,72 @@ static void setup_frame_size_with_refs(VP9D_COMP *pbi, setup_display_size(cm, rb); } -static void decode_tile(VP9D_COMP *pbi, vp9_reader *r) { +static void setup_tile_context(VP9D_COMP *const pbi, MACROBLOCKD *const xd, + int tile_col) { + int i; + xd->mi_stream = pbi->mi_streams[tile_col]; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + xd->above_context[i] = pbi->above_context[i]; + } + // see note in alloc_tile_storage(). + xd->above_seg_context = pbi->above_seg_context; +} + +static void decode_tile(VP9D_COMP *pbi, const TileInfo *const tile, + vp9_reader *r) { const int num_threads = pbi->oxcf.max_threads; VP9_COMMON *const cm = &pbi->common; int mi_row, mi_col; - YV12_BUFFER_CONFIG *const fb = &cm->yv12_fb[cm->new_fb_idx]; + MACROBLOCKD *xd = &pbi->mb; if (pbi->do_loopfilter_inline) { - if (num_threads > 1) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - lf_data->frame_buffer = fb; - lf_data->cm = cm; - lf_data->xd = pbi->mb; - lf_data->stop = 0; - lf_data->y_only = 0; - } + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + lf_data->frame_buffer = get_frame_new_buffer(cm); + lf_data->cm = cm; + lf_data->xd = pbi->mb; + lf_data->stop = 0; + lf_data->y_only = 0; vp9_loop_filter_frame_init(cm, cm->lf.filter_level); } - for (mi_row = cm->cur_tile_mi_row_start; mi_row < cm->cur_tile_mi_row_end; + for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { // For a SB there are 2 left contexts, each pertaining to a MB row within - vp9_zero(cm->left_context); - vp9_zero(cm->left_seg_context); - for (mi_col = cm->cur_tile_mi_col_start; mi_col < cm->cur_tile_mi_col_end; + vp9_zero(xd->left_context); + vp9_zero(xd->left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) - decode_modes_sb(pbi, mi_row, mi_col, r, BLOCK_64X64); + decode_modes_sb(cm, xd, tile, mi_row, mi_col, r, BLOCK_64X64); if (pbi->do_loopfilter_inline) { - // delay the loopfilter by 1 macroblock row. const int lf_start = mi_row - MI_BLOCK_SIZE; - if (lf_start < 0) continue; + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - if (num_threads > 1) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + // delay the loopfilter by 1 macroblock row. + if (lf_start < 0) continue; - // decoding has completed: finish up the loop filter in this thread. - if (mi_row + MI_BLOCK_SIZE >= cm->cur_tile_mi_row_end) continue; + // decoding has completed: finish up the loop filter in this thread. + if (mi_row + MI_BLOCK_SIZE >= tile->mi_row_end) continue; - vp9_worker_sync(&pbi->lf_worker); - lf_data->start = lf_start; - lf_data->stop = mi_row; - pbi->lf_worker.hook = vp9_loop_filter_worker; + vp9_worker_sync(&pbi->lf_worker); + lf_data->start = lf_start; + lf_data->stop = mi_row; + if (num_threads > 1) { vp9_worker_launch(&pbi->lf_worker); } else { - vp9_loop_filter_rows(fb, cm, &pbi->mb, lf_start, mi_row, 0); + vp9_worker_execute(&pbi->lf_worker); } } } if (pbi->do_loopfilter_inline) { - int lf_start; - if (num_threads > 1) { - LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; - vp9_worker_sync(&pbi->lf_worker); - lf_start = lf_data->stop; - } else { - lf_start = mi_row - MI_BLOCK_SIZE; - } - vp9_loop_filter_rows(fb, cm, &pbi->mb, - lf_start, cm->mi_rows, 0); + vp9_worker_sync(&pbi->lf_worker); + lf_data->start = lf_data->stop; + lf_data->stop = cm->mi_rows; + vp9_worker_execute(&pbi->lf_worker); } } @@ -656,10 +843,32 @@ static void setup_tile_info(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { cm->log2_tile_rows += vp9_rb_read_bit(rb); } +// Reads the next tile returning its size and adjusting '*data' accordingly +// based on 'is_last'. +static size_t get_tile(const uint8_t *const data_end, + int is_last, + struct vpx_internal_error_info *error_info, + const uint8_t **data) { + size_t size; + + if (!is_last) { + if (!read_is_valid(*data, 4, data_end)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile length"); + + size = read_be32(*data); + *data += 4; + } else { + size = data_end - *data; + } + return size; +} + static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { vp9_reader residual_bc; VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; const uint8_t *const data_end = pbi->source + pbi->source_sz; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); @@ -669,70 +878,57 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. - vpx_memset(cm->above_context[0], 0, - sizeof(ENTROPY_CONTEXT) * MAX_MB_PLANE * (2 * aligned_mi_cols)); + vpx_memset(pbi->above_context[0], 0, + sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * + 2 * aligned_mi_cols); - vpx_memset(cm->above_seg_context, 0, - sizeof(PARTITION_CONTEXT) * aligned_mi_cols); + vpx_memset(pbi->above_seg_context, 0, + sizeof(*pbi->above_seg_context) * aligned_mi_cols); if (pbi->oxcf.inv_tile_order) { const uint8_t *data_ptr2[4][1 << 6]; vp9_reader bc_bak = {0}; - // pre-initialize the offsets, we're going to read in inverse order + // pre-initialize the offsets, we're going to decode in inverse order data_ptr2[0][0] = data; for (tile_row = 0; tile_row < tile_rows; tile_row++) { - if (tile_row) { - const int size = read_be32(data_ptr2[tile_row - 1][tile_cols - 1]); - data_ptr2[tile_row - 1][tile_cols - 1] += 4; - data_ptr2[tile_row][0] = data_ptr2[tile_row - 1][tile_cols - 1] + size; - } - - for (tile_col = 1; tile_col < tile_cols; tile_col++) { - const int size = read_be32(data_ptr2[tile_row][tile_col - 1]); - data_ptr2[tile_row][tile_col - 1] += 4; - data_ptr2[tile_row][tile_col] = - data_ptr2[tile_row][tile_col - 1] + size; + for (tile_col = 0; tile_col < tile_cols; tile_col++) { + const int last_tile = + tile_row == tile_rows - 1 && tile_col == tile_cols - 1; + const size_t size = get_tile(data_end, last_tile, &cm->error, &data); + data_ptr2[tile_row][tile_col] = data; + data += size; } } for (tile_row = 0; tile_row < tile_rows; tile_row++) { - vp9_get_tile_row_offsets(cm, tile_row); for (tile_col = tile_cols - 1; tile_col >= 0; tile_col--) { - vp9_get_tile_col_offsets(cm, tile_col); - setup_token_decoder(pbi, data_ptr2[tile_row][tile_col], + TileInfo tile; + + vp9_tile_init(&tile, cm, tile_row, tile_col); + setup_token_decoder(data_ptr2[tile_row][tile_col], data_end, data_end - data_ptr2[tile_row][tile_col], - &residual_bc); - decode_tile(pbi, &residual_bc); + &cm->error, &residual_bc); + setup_tile_context(pbi, xd, tile_col); + decode_tile(pbi, &tile, &residual_bc); if (tile_row == tile_rows - 1 && tile_col == tile_cols - 1) bc_bak = residual_bc; } } residual_bc = bc_bak; } else { - int has_more; - for (tile_row = 0; tile_row < tile_rows; tile_row++) { - vp9_get_tile_row_offsets(cm, tile_row); for (tile_col = 0; tile_col < tile_cols; tile_col++) { - size_t size; - - vp9_get_tile_col_offsets(cm, tile_col); + const int last_tile = + tile_row == tile_rows - 1 && tile_col == tile_cols - 1; + const size_t size = get_tile(data_end, last_tile, &cm->error, &data); + TileInfo tile; - has_more = tile_col < tile_cols - 1 || tile_row < tile_rows - 1; - if (has_more) { - if (!read_is_valid(data, 4, data_end)) - vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, - "Truncated packet or corrupt tile length"); + vp9_tile_init(&tile, cm, tile_row, tile_col); - size = read_be32(data); - data += 4; - } else { - size = data_end - data; - } - - setup_token_decoder(pbi, data, size, &residual_bc); - decode_tile(pbi, &residual_bc); + setup_token_decoder(data, data_end, size, &cm->error, &residual_bc); + setup_tile_context(pbi, xd, tile_col); + decode_tile(pbi, &tile, &residual_bc); data += size; } } @@ -741,10 +937,111 @@ static const uint8_t *decode_tiles(VP9D_COMP *pbi, const uint8_t *data) { return vp9_reader_find_end(&residual_bc); } +static int tile_worker_hook(void *arg1, void *arg2) { + TileWorkerData *const tile_data = (TileWorkerData*)arg1; + const TileInfo *const tile = (TileInfo*)arg2; + int mi_row, mi_col; + + for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; + mi_row += MI_BLOCK_SIZE) { + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) + decode_modes_sb(tile_data->cm, &tile_data->xd, tile, + mi_row, mi_col, &tile_data->bit_reader, BLOCK_64X64); + } + return !tile_data->xd.corrupted; +} + +static const uint8_t *decode_tiles_mt(VP9D_COMP *pbi, const uint8_t *data) { + VP9_COMMON *const cm = &pbi->common; + const uint8_t *const data_end = pbi->source + pbi->source_sz; + const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + const int num_workers = MIN(pbi->oxcf.max_threads & ~1, tile_cols); + int tile_col = 0; + + assert(tile_rows == 1); + (void)tile_rows; + + if (num_workers > pbi->num_tile_workers) { + int i; + CHECK_MEM_ERROR(cm, pbi->tile_workers, + vpx_realloc(pbi->tile_workers, + num_workers * sizeof(*pbi->tile_workers))); + for (i = pbi->num_tile_workers; i < num_workers; ++i) { + VP9Worker *const worker = &pbi->tile_workers[i]; + ++pbi->num_tile_workers; + + vp9_worker_init(worker); + worker->hook = (VP9WorkerHook)tile_worker_hook; + CHECK_MEM_ERROR(cm, worker->data1, + vpx_memalign(32, sizeof(TileWorkerData))); + CHECK_MEM_ERROR(cm, worker->data2, vpx_malloc(sizeof(TileInfo))); + if (i < num_workers - 1 && !vp9_worker_reset(worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Tile decoder thread creation failed"); + } + } + } + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + vpx_memset(pbi->above_context[0], 0, + sizeof(*pbi->above_context[0]) * MAX_MB_PLANE * + 2 * aligned_mi_cols); + vpx_memset(pbi->above_seg_context, 0, + sizeof(*pbi->above_seg_context) * aligned_mi_cols); + + while (tile_col < tile_cols) { + int i; + for (i = 0; i < num_workers && tile_col < tile_cols; ++i) { + VP9Worker *const worker = &pbi->tile_workers[i]; + TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; + TileInfo *const tile = (TileInfo*)worker->data2; + const size_t size = + get_tile(data_end, tile_col == tile_cols - 1, &cm->error, &data); + + tile_data->cm = cm; + tile_data->xd = pbi->mb; + tile_data->xd.corrupted = 0; + vp9_tile_init(tile, tile_data->cm, 0, tile_col); + + setup_token_decoder(data, data_end, size, &cm->error, + &tile_data->bit_reader); + setup_tile_context(pbi, &tile_data->xd, tile_col); + + worker->had_error = 0; + if (i == num_workers - 1 || tile_col == tile_cols - 1) { + vp9_worker_execute(worker); + } else { + vp9_worker_launch(worker); + } + + data += size; + ++tile_col; + } + + for (; i > 0; --i) { + VP9Worker *const worker = &pbi->tile_workers[i - 1]; + pbi->mb.corrupted |= !vp9_worker_sync(worker); + } + } + + { + const int final_worker = (tile_cols + num_workers - 1) % num_workers; + TileWorkerData *const tile_data = + (TileWorkerData*)pbi->tile_workers[final_worker].data1; + return vp9_reader_find_end(&tile_data->bit_reader); + } +} + static void check_sync_code(VP9_COMMON *cm, struct vp9_read_bit_buffer *rb) { - if (vp9_rb_read_literal(rb, 8) != SYNC_CODE_0 || - vp9_rb_read_literal(rb, 8) != SYNC_CODE_1 || - vp9_rb_read_literal(rb, 8) != SYNC_CODE_2) { + if (vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_0 || + vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_1 || + vp9_rb_read_literal(rb, 8) != VP9_SYNC_CODE_2) { vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); } @@ -755,34 +1052,6 @@ static void error_handler(void *data, size_t bit_offset) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } -static void setup_inter_inter(VP9_COMMON *cm) { - int i; - - cm->allow_comp_inter_inter = 0; - for (i = 1; i < ALLOWED_REFS_PER_FRAME; ++i) - cm->allow_comp_inter_inter |= - cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]; - - if (cm->allow_comp_inter_inter) { - // which one is always-on in comp inter-inter? - if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[GOLDEN_FRAME]) { - cm->comp_fixed_ref = ALTREF_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = GOLDEN_FRAME; - } else if (cm->ref_frame_sign_bias[LAST_FRAME] == - cm->ref_frame_sign_bias[ALTREF_FRAME]) { - cm->comp_fixed_ref = GOLDEN_FRAME; - cm->comp_var_ref[0] = LAST_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } else { - cm->comp_fixed_ref = LAST_FRAME; - cm->comp_var_ref[0] = GOLDEN_FRAME; - cm->comp_var_ref[1] = ALTREF_FRAME; - } - } -} - #define RESERVED \ if (vp9_rb_read_bit(rb)) \ vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, \ @@ -791,13 +1060,12 @@ static void setup_inter_inter(VP9_COMMON *cm) { static size_t read_uncompressed_header(VP9D_COMP *pbi, struct vp9_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; - MACROBLOCKD *const xd = &pbi->mb; size_t sz; int i; cm->last_frame_type = cm->frame_type; - if (vp9_rb_read_literal(rb, 2) != 0x2) + if (vp9_rb_read_literal(rb, 2) != VP9_FRAME_MARKER) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); @@ -818,12 +1086,10 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->error_resilient_mode = vp9_rb_read_bit(rb); if (cm->frame_type == KEY_FRAME) { - int csp; - check_sync_code(cm, rb); - csp = vp9_rb_read_literal(rb, 3); // colorspace - if (csp != 7) { // != sRGB + cm->color_space = vp9_rb_read_literal(rb, 3); // colorspace + if (cm->color_space != SRGB) { vp9_rb_read_bit(rb); // [16,235] (including xvycc) vs [0,255] range if (cm->version == 1) { cm->subsampling_x = vp9_rb_read_bit(rb); @@ -870,13 +1136,11 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, setup_frame_size_with_refs(pbi, rb); - xd->allow_high_precision_mv = vp9_rb_read_bit(rb); + cm->allow_high_precision_mv = vp9_rb_read_bit(rb); cm->mcomp_filter_type = read_interp_filter_type(rb); for (i = 0; i < ALLOWED_REFS_PER_FRAME; ++i) vp9_setup_scale_factors(cm, i); - - setup_inter_inter(cm); } } @@ -888,13 +1152,15 @@ static size_t read_uncompressed_header(VP9D_COMP *pbi, cm->frame_parallel_decoding_mode = 1; } + // This flag will be overridden by the call to vp9_setup_past_independence + // below, forcing the use of context 0 for those frame types. cm->frame_context_idx = vp9_rb_read_literal(rb, NUM_FRAME_CONTEXTS_LOG2); - if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode || cm->intra_only) + if (frame_is_intra_only(cm) || cm->error_resilient_mode) vp9_setup_past_independence(cm); setup_loopfilter(&cm->lf, rb); - setup_quantization(pbi, rb); + setup_quantization(cm, &pbi->mb, rb); setup_segmentation(&cm->seg, rb); setup_tile_info(cm, rb); @@ -911,7 +1177,9 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, size_t partition_size) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; + FRAME_CONTEXT *const fc = &cm->fc; vp9_reader r; + int k; if (vp9_reader_init(&r, data, partition_size)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, @@ -919,10 +1187,36 @@ static int read_compressed_header(VP9D_COMP *pbi, const uint8_t *data, cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); if (cm->tx_mode == TX_MODE_SELECT) - read_tx_probs(&cm->fc.tx_probs, &r); - read_coef_probs(&cm->fc, cm->tx_mode, &r); + read_tx_probs(&fc->tx_probs, &r); + read_coef_probs(fc, cm->tx_mode, &r); + + for (k = 0; k < MBSKIP_CONTEXTS; ++k) + vp9_diff_update_prob(&r, &fc->mbskip_probs[k]); + + if (!frame_is_intra_only(cm)) { + nmv_context *const nmvc = &fc->nmvc; + int i, j; + + read_inter_mode_probs(fc, &r); + + if (cm->mcomp_filter_type == SWITCHABLE) + read_switchable_interp_probs(fc, &r); + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); + + read_comp_pred(cm, &r); + + for (j = 0; j < BLOCK_SIZE_GROUPS; j++) + for (i = 0; i < INTRA_MODES - 1; ++i) + vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]); + + for (j = 0; j < PARTITION_CONTEXTS; ++j) + for (i = 0; i < PARTITION_TYPES - 1; ++i) + vp9_diff_update_prob(&r, &fc->partition_prob[INTER_FRAME][j][i]); - vp9_prepare_read_mode_info(pbi, &r); + read_mv_probs(&r, nmvc, cm->allow_high_precision_mv); + } return vp9_reader_has_error(&r); } @@ -939,59 +1233,109 @@ void vp9_init_dequantizer(VP9_COMMON *cm) { } } +#ifdef NDEBUG +#define debug_check_frame_counts(cm) (void)0 +#else // !NDEBUG +// Counts should only be incremented when frame_parallel_decoding_mode and +// error_resilient_mode are disabled. +static void debug_check_frame_counts(const VP9_COMMON *const cm) { + FRAME_COUNTS zero_counts; + vp9_zero(zero_counts); + assert(cm->frame_parallel_decoding_mode || cm->error_resilient_mode); + assert(!memcmp(cm->counts.y_mode, zero_counts.y_mode, + sizeof(cm->counts.y_mode))); + assert(!memcmp(cm->counts.uv_mode, zero_counts.uv_mode, + sizeof(cm->counts.uv_mode))); + assert(!memcmp(cm->counts.partition, zero_counts.partition, + sizeof(cm->counts.partition))); + assert(!memcmp(cm->counts.coef, zero_counts.coef, + sizeof(cm->counts.coef))); + assert(!memcmp(cm->counts.eob_branch, zero_counts.eob_branch, + sizeof(cm->counts.eob_branch))); + assert(!memcmp(cm->counts.switchable_interp, zero_counts.switchable_interp, + sizeof(cm->counts.switchable_interp))); + assert(!memcmp(cm->counts.inter_mode, zero_counts.inter_mode, + sizeof(cm->counts.inter_mode))); + assert(!memcmp(cm->counts.intra_inter, zero_counts.intra_inter, + sizeof(cm->counts.intra_inter))); + assert(!memcmp(cm->counts.comp_inter, zero_counts.comp_inter, + sizeof(cm->counts.comp_inter))); + assert(!memcmp(cm->counts.single_ref, zero_counts.single_ref, + sizeof(cm->counts.single_ref))); + assert(!memcmp(cm->counts.comp_ref, zero_counts.comp_ref, + sizeof(cm->counts.comp_ref))); + assert(!memcmp(&cm->counts.tx, &zero_counts.tx, sizeof(cm->counts.tx))); + assert(!memcmp(cm->counts.mbskip, zero_counts.mbskip, + sizeof(cm->counts.mbskip))); + assert(!memcmp(&cm->counts.mv, &zero_counts.mv, sizeof(cm->counts.mv))); +} +#endif // NDEBUG + int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { int i; VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; const uint8_t *data = pbi->source; - const uint8_t *data_end = pbi->source + pbi->source_sz; + const uint8_t *const data_end = pbi->source + pbi->source_sz; - struct vp9_read_bit_buffer rb = { data, data_end, 0, - cm, error_handler }; + struct vp9_read_bit_buffer rb = { data, data_end, 0, cm, error_handler }; const size_t first_partition_size = read_uncompressed_header(pbi, &rb); const int keyframe = cm->frame_type == KEY_FRAME; - YV12_BUFFER_CONFIG *new_fb = &cm->yv12_fb[cm->new_fb_idx]; + const int tile_rows = 1 << cm->log2_tile_rows; + const int tile_cols = 1 << cm->log2_tile_cols; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); if (!first_partition_size) { // showing a frame directly *p_data_end = data + 1; return 0; } - data += vp9_rb_bytes_read(&rb); - xd->corrupted = 0; - new_fb->corrupted = 0; - pbi->do_loopfilter_inline = - (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; if (!pbi->decoded_key_frame && !keyframe) return -1; + data += vp9_rb_bytes_read(&rb); if (!read_is_valid(data, first_partition_size, data_end)) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt header length"); - setup_plane_dequants(cm, &pbi->mb, cm->base_qindex); + pbi->do_loopfilter_inline = + (cm->log2_tile_rows | cm->log2_tile_cols) == 0 && cm->lf.filter_level; + if (pbi->do_loopfilter_inline && pbi->lf_worker.data1 == NULL) { + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_malloc(sizeof(LFWorkerData))); + pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; + if (pbi->oxcf.max_threads > 1 && !vp9_worker_reset(&pbi->lf_worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Loop filter thread creation failed"); + } + } + + alloc_tile_storage(pbi, tile_cols); xd->mi_8x8 = cm->mi_grid_visible; - xd->mic_stream_ptr = cm->mi; xd->mode_info_stride = cm->mode_info_stride; + set_prev_mi(cm); - cm->fc = cm->frame_contexts[cm->frame_context_idx]; - - vp9_zero(cm->counts); - - new_fb->corrupted |= read_compressed_header(pbi, data, first_partition_size); - + setup_plane_dequants(cm, xd, cm->base_qindex); setup_block_dptrs(xd, cm->subsampling_x, cm->subsampling_y); - // clear out the coeff buffer + cm->fc = cm->frame_contexts[cm->frame_context_idx]; + vp9_zero(cm->counts); for (i = 0; i < MAX_MB_PLANE; ++i) vp9_zero(xd->plane[i].qcoeff); - set_prev_mi(cm); + xd->corrupted = 0; + new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); - *p_data_end = decode_tiles(pbi, data + first_partition_size); + // TODO(jzern): remove frame_parallel_decoding_mode restriction for + // single-frame tile decoding. + if (pbi->oxcf.max_threads > 1 && tile_rows == 1 && tile_cols > 1 && + cm->frame_parallel_decoding_mode) { + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size); + } else { + *p_data_end = decode_tiles(pbi, data + first_partition_size); + } cm->last_width = cm->width; cm->last_height = cm->height; @@ -1009,10 +1353,12 @@ int vp9_decode_frame(VP9D_COMP *pbi, const uint8_t **p_data_end) { if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { vp9_adapt_coef_probs(cm); - if (!keyframe && !cm->intra_only) { + if (!frame_is_intra_only(cm)) { vp9_adapt_mode_probs(cm); - vp9_adapt_mv_probs(cm, xd->allow_high_precision_mv); + vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); } + } else { + debug_check_frame_counts(cm); } if (cm->refresh_frame_context) diff --git a/source/libvpx/vp9/decoder/vp9_detokenize.c b/source/libvpx/vp9/decoder/vp9_detokenize.c index cd74a0b..6ecce28 100644 --- a/source/libvpx/vp9/decoder/vp9_detokenize.c +++ b/source/libvpx/vp9/decoder/vp9_detokenize.c @@ -61,15 +61,16 @@ static const vp9_prob cat6_prob[15] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; -DECLARE_ALIGNED(16, extern const uint8_t, - vp9_pt_energy_class[MAX_ENTROPY_TOKENS]); -#define INCREMENT_COUNT(token) \ - do { \ - coef_counts[type][ref][band][pt] \ - [token >= TWO_TOKEN ? \ - (token == DCT_EOB_TOKEN ? DCT_EOB_MODEL_TOKEN : TWO_TOKEN) : \ - token]++; \ - token_cache[scan[c]] = vp9_pt_energy_class[token]; \ +#define INCREMENT_COUNT(token) \ + do { \ + if (!cm->frame_parallel_decoding_mode) { \ + ++coef_counts[type][ref][band][pt] \ + [token >= TWO_TOKEN ? \ + (token == DCT_EOB_TOKEN ? \ + DCT_EOB_MODEL_TOKEN : TWO_TOKEN) : \ + token]; \ + } \ + token_cache[scan[c]] = vp9_pt_energy_class[token]; \ } while (0) #define WRITE_COEF_CONTINUE(val, token) \ @@ -90,24 +91,21 @@ DECLARE_ALIGNED(16, extern const uint8_t, static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, vp9_reader *r, int block_idx, PLANE_TYPE type, int seg_eob, int16_t *qcoeff_ptr, - TX_SIZE tx_size, const int16_t *dq, - ENTROPY_CONTEXT *A, ENTROPY_CONTEXT *L) { - FRAME_CONTEXT *const fc = &cm->fc; + TX_SIZE tx_size, const int16_t *dq, int pt) { + const FRAME_CONTEXT *const fc = &cm->fc; FRAME_COUNTS *const counts = &cm->counts; - const int ref = is_inter_block(&xd->this_mi->mbmi); + const int ref = is_inter_block(&xd->mi_8x8[0]->mbmi); int band, c = 0; - vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] = + const vp9_prob (*coef_probs)[PREV_COEF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; vp9_prob coef_probs_full[COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; uint8_t load_map[COEF_BANDS][PREV_COEF_CONTEXTS] = { { 0 } }; - vp9_prob *prob; + const vp9_prob *prob; vp9_coeff_count_model *coef_counts = counts->coef[tx_size]; const int16_t *scan, *nb; - const uint8_t *band_translate; + const uint8_t *const band_translate = get_band_translate(tx_size); uint8_t token_cache[1024]; - int pt = get_entropy_context(xd, tx_size, type, block_idx, A, L, - &scan, &band_translate); - nb = vp9_get_coef_neighbors_handle(scan); + get_scan(xd, tx_size, type, block_idx, &scan, &nb); while (1) { int val; @@ -118,11 +116,12 @@ static int decode_coefs(VP9_COMMON *cm, const MACROBLOCKD *xd, pt = get_coef_context(nb, token_cache, c); band = get_coef_band(band_translate, c); prob = coef_probs[band][pt]; - counts->eob_branch[tx_size][type][ref][band][pt]++; + if (!cm->frame_parallel_decoding_mode) + ++counts->eob_branch[tx_size][type][ref][band][pt]; if (!vp9_read(r, prob[EOB_CONTEXT_NODE])) break; -SKIP_START: + SKIP_START: if (c >= seg_eob) break; if (c) @@ -203,47 +202,33 @@ SKIP_START: WRITE_COEF_CONTINUE(val, DCT_VAL_CATEGORY6); } - if (c < seg_eob) - coef_counts[type][ref][band][pt][DCT_EOB_MODEL_TOKEN]++; - + if (c < seg_eob) { + if (!cm->frame_parallel_decoding_mode) + ++coef_counts[type][ref][band][pt][DCT_EOB_MODEL_TOKEN]; + } return c; } -struct decode_block_args { - VP9D_COMP *pbi; - vp9_reader *r; - int *eobtotal; -}; - -static void decode_block(int plane, int block, BLOCK_SIZE plane_bsize, - TX_SIZE tx_size, void *argv) { - const struct decode_block_args* const arg = argv; - - // find the maximum eob for this transform size, adjusted by segment - MACROBLOCKD *xd = &arg->pbi->mb; - struct segmentation *seg = &arg->pbi->common.seg; - struct macroblockd_plane* pd = &xd->plane[plane]; - const int segment_id = xd->this_mi->mbmi.segment_id; - const int seg_eob = get_tx_eob(seg, segment_id, tx_size); - int aoff, loff, eob; - +int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, + int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, vp9_reader *r) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int seg_eob = get_tx_eob(&cm->seg, xd->mi_8x8[0]->mbmi.segment_id, + tx_size); + int aoff, loff, eob, pt; txfrm_block_to_raster_xy(plane_bsize, tx_size, block, &aoff, &loff); + pt = get_entropy_context(tx_size, pd->above_context + aoff, + pd->left_context + loff); - eob = decode_coefs(&arg->pbi->common, xd, arg->r, block, + eob = decode_coefs(cm, xd, r, block, pd->plane_type, seg_eob, BLOCK_OFFSET(pd->qcoeff, block), - tx_size, pd->dequant, - pd->above_context + aoff, pd->left_context + loff); + tx_size, pd->dequant, pt); set_contexts(xd, pd, plane_bsize, tx_size, eob > 0, aoff, loff); pd->eobs[block] = eob; - *arg->eobtotal += eob; + return eob; } -int vp9_decode_tokens(VP9D_COMP *pbi, vp9_reader *r, BLOCK_SIZE bsize) { - int eobtotal = 0; - struct decode_block_args args = {pbi, r, &eobtotal}; - foreach_transformed_block(&pbi->mb, bsize, decode_block, &args); - return eobtotal; -} + diff --git a/source/libvpx/vp9/decoder/vp9_detokenize.h b/source/libvpx/vp9/decoder/vp9_detokenize.h index cf07c56..94dd8e4 100644 --- a/source/libvpx/vp9/decoder/vp9_detokenize.h +++ b/source/libvpx/vp9/decoder/vp9_detokenize.h @@ -15,6 +15,8 @@ #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_dboolhuff.h" -int vp9_decode_tokens(VP9D_COMP* pbi, vp9_reader *r, BLOCK_SIZE bsize); +int vp9_decode_block_tokens(VP9_COMMON *cm, MACROBLOCKD *xd, + int plane, int block, BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, vp9_reader *r); #endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/source/libvpx/vp9/decoder/vp9_dsubexp.c b/source/libvpx/vp9/decoder/vp9_dsubexp.c index ea877f6..fcca017 100644 --- a/source/libvpx/vp9/decoder/vp9_dsubexp.c +++ b/source/libvpx/vp9/decoder/vp9_dsubexp.c @@ -99,8 +99,8 @@ static int decode_term_subexp(vp9_reader *r, int k, int num_syms) { return word; } -void vp9_diff_update_prob(vp9_reader *r, int update_prob, vp9_prob* p) { - if (vp9_read(r, update_prob)) { +void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p) { + if (vp9_read(r, DIFF_UPDATE_PROB)) { const int delp = decode_term_subexp(r, SUBEXP_PARAM, 255); *p = (vp9_prob)inv_remap_prob(delp, *p); } diff --git a/source/libvpx/vp9/decoder/vp9_dsubexp.h b/source/libvpx/vp9/decoder/vp9_dsubexp.h index 21ac313..aeb9399 100644 --- a/source/libvpx/vp9/decoder/vp9_dsubexp.h +++ b/source/libvpx/vp9/decoder/vp9_dsubexp.h @@ -14,6 +14,6 @@ #include "vp9/decoder/vp9_dboolhuff.h" -void vp9_diff_update_prob(vp9_reader *r, int update_prob, vp9_prob* p); +void vp9_diff_update_prob(vp9_reader *r, vp9_prob* p); #endif // VP9_DECODER_VP9_DSUBEXP_H_ diff --git a/source/libvpx/vp9/decoder/vp9_idct_blk.c b/source/libvpx/vp9/decoder/vp9_idct_blk.c deleted file mode 100644 index 395e636..0000000 --- a/source/libvpx/vp9/decoder/vp9_idct_blk.c +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "vp9_rtcd.h" -#include "vp9/common/vp9_blockd.h" -#include "vp9/decoder/vp9_idct_blk.h" - -static void add_constant_residual(const int16_t diff, uint8_t *dest, int stride, - int width, int height) { - int r, c; - - for (r = 0; r < height; r++) { - for (c = 0; c < width; c++) - dest[c] = clip_pixel(diff + dest[c]); - - dest += stride; - } -} - -void vp9_add_constant_residual_8x8_c(const int16_t diff, uint8_t *dest, - int stride) { - add_constant_residual(diff, dest, stride, 8, 8); -} - -void vp9_add_constant_residual_16x16_c(const int16_t diff, uint8_t *dest, - int stride) { - add_constant_residual(diff, dest, stride, 16, 16); -} - -void vp9_add_constant_residual_32x32_c(const int16_t diff, uint8_t *dest, - int stride) { - add_constant_residual(diff, dest, stride, 32, 32); -} - -void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, int stride, - int eob) { - if (tx_type == DCT_DCT) { - vp9_idct_add(input, dest, stride, eob); - } else { - vp9_short_iht4x4_add(input, dest, stride, tx_type); - vpx_memset(input, 0, 32); - } -} - -void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct_add_8x8(input, dest, stride, eob); - } else { - if (eob > 0) { - vp9_short_iht8x8_add(input, dest, stride, tx_type); - vpx_memset(input, 0, 128); - } - } -} - -void vp9_idct_add_c(int16_t *input, uint8_t *dest, int stride, int eob) { - if (eob > 1) { - vp9_short_idct4x4_add(input, dest, stride); - vpx_memset(input, 0, 32); - } else { - vp9_short_idct4x4_1_add(input, dest, stride); - ((int *)input)[0] = 0; - } -} - -void vp9_idct_add_lossless_c(int16_t *input, uint8_t *dest, int stride, - int eob) { - if (eob > 1) { - vp9_short_iwalsh4x4_add(input, dest, stride); - vpx_memset(input, 0, 32); - } else { - vp9_short_iwalsh4x4_1_add_c(input, dest, stride); - ((int *)input)[0] = 0; - } -} - -void vp9_idct_add_8x8_c(int16_t *input, uint8_t *dest, int stride, int eob) { - // If dc is 1, then input[0] is the reconstructed value, do not need - // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. - - // The calculation can be simplified if there are not many non-zero dct - // coefficients. Use eobs to decide what to do. - // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. - // Combine that with code here. - if (eob) { - if (eob == 1) { - // DC only DCT coefficient - vp9_short_idct8x8_1_add(input, dest, stride); - input[0] = 0; - } else if (eob <= 10) { - vp9_short_idct10_8x8_add(input, dest, stride); - vpx_memset(input, 0, 128); - } else { - vp9_short_idct8x8_add(input, dest, stride); - vpx_memset(input, 0, 128); - } - } -} - -void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, uint8_t *dest, - int stride, int eob) { - if (tx_type == DCT_DCT) { - vp9_idct_add_16x16(input, dest, stride, eob); - } else { - if (eob > 0) { - vp9_short_iht16x16_add(input, dest, stride, tx_type); - vpx_memset(input, 0, 512); - } - } -} - -void vp9_idct_add_16x16_c(int16_t *input, uint8_t *dest, int stride, int eob) { - /* The calculation can be simplified if there are not many non-zero dct - * coefficients. Use eobs to separate different cases. */ - if (eob) { - if (eob == 1) { - /* DC only DCT coefficient. */ - vp9_short_idct16x16_1_add(input, dest, stride); - input[0] = 0; - } else if (eob <= 10) { - vp9_short_idct10_16x16_add(input, dest, stride); - vpx_memset(input, 0, 512); - } else { - vp9_short_idct16x16_add(input, dest, stride); - vpx_memset(input, 0, 512); - } - } -} - -void vp9_idct_add_32x32_c(int16_t *input, uint8_t *dest, int stride, int eob) { - DECLARE_ALIGNED_ARRAY(16, int16_t, output, 1024); - - if (eob) { - if (eob == 1) { - vp9_short_idct1_32x32(input, output); - vp9_add_constant_residual_32x32(output[0], dest, stride); - input[0] = 0; - } else { - vp9_short_idct32x32_add(input, dest, stride); - vpx_memset(input, 0, 2048); - } - } -} - diff --git a/source/libvpx/vp9/decoder/vp9_idct_blk.h b/source/libvpx/vp9/decoder/vp9_idct_blk.h deleted file mode 100644 index 1810bd0..0000000 --- a/source/libvpx/vp9/decoder/vp9_idct_blk.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2010 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -#ifndef VP9_DECODER_VP9_IDCT_BLK_H_ -#define VP9_DECODER_VP9_IDCT_BLK_H_ - -#include "vp9/common/vp9_blockd.h" - - -void vp9_idct_add_lossless_c(int16_t *input, unsigned char *dest, int stride, - int eob); - -void vp9_iht_add_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, - int stride, int eob); - -void vp9_iht_add_8x8_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, - int stride, int eob); - -void vp9_iht_add_16x16_c(TX_TYPE tx_type, int16_t *input, unsigned char *dest, - int stride, int eob); - -#endif // VP9_DECODER_VP9_IDCT_BLK_H_ diff --git a/source/libvpx/vp9/decoder/vp9_onyxd.h b/source/libvpx/vp9/decoder/vp9_onyxd.h index cd5b750..a4b9c24 100644 --- a/source/libvpx/vp9/decoder/vp9_onyxd.h +++ b/source/libvpx/vp9/decoder/vp9_onyxd.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_COMMON_VP9_ONYXD_H_ -#define VP9_COMMON_VP9_ONYXD_H_ +#ifndef VP9_DECODER_VP9_ONYXD_H_ +#define VP9_DECODER_VP9_ONYXD_H_ #ifdef __cplusplus extern "C" { @@ -40,7 +40,7 @@ typedef enum { void vp9_initialize_dec(); int vp9_receive_compressed_data(VP9D_PTR comp, - uint64_t size, const uint8_t **dest, + size_t size, const uint8_t **dest, int64_t time_stamp); int vp9_get_raw_frame(VP9D_PTR comp, YV12_BUFFER_CONFIG *sd, @@ -66,4 +66,4 @@ void vp9_remove_decompressor(VP9D_PTR comp); } #endif -#endif // VP9_COMMON_VP9_ONYXD_H_ +#endif // VP9_DECODER_VP9_ONYXD_H_ diff --git a/source/libvpx/vp9/decoder/vp9_onyxd_if.c b/source/libvpx/vp9/decoder/vp9_onyxd_if.c index 17d5def..5f970a3 100644 --- a/source/libvpx/vp9/decoder/vp9_onyxd_if.c +++ b/source/libvpx/vp9/decoder/vp9_onyxd_if.c @@ -65,13 +65,12 @@ static void recon_write_yuv_frame(const char *name, #endif #if WRITE_RECON_BUFFER == 2 void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { - // write the frame FILE *yframe; int i; char filename[255]; - sprintf(filename, "dx\\y%04d.raw", this_frame); + snprintf(filename, sizeof(filename)-1, "dx\\y%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->y_height; i++) @@ -79,7 +78,7 @@ void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { frame->y_width, 1, yframe); fclose(yframe); - sprintf(filename, "dx\\u%04d.raw", this_frame); + snprintf(filename, sizeof(filename)-1, "dx\\u%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->uv_height; i++) @@ -87,7 +86,7 @@ void write_dx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { frame->uv_width, 1, yframe); fclose(yframe); - sprintf(filename, "dx\\v%04d.raw", this_frame); + snprintf(filename, sizeof(filename)-1, "dx\\v%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->uv_height; i++) @@ -142,20 +141,13 @@ VP9D_PTR vp9_create_decompressor(VP9D_CONFIG *oxcf) { cm->error.setjmp = 0; pbi->decoded_key_frame = 0; - if (pbi->oxcf.max_threads > 1) { - vp9_worker_init(&pbi->lf_worker); - pbi->lf_worker.data1 = vpx_malloc(sizeof(LFWorkerData)); - pbi->lf_worker.hook = (VP9WorkerHook)vp9_loop_filter_worker; - if (pbi->lf_worker.data1 == NULL || !vp9_worker_reset(&pbi->lf_worker)) { - vp9_remove_decompressor(pbi); - return NULL; - } - } + vp9_worker_init(&pbi->lf_worker); return pbi; } void vp9_remove_decompressor(VP9D_PTR ptr) { + int i; VP9D_COMP *const pbi = (VP9D_COMP *)ptr; if (!pbi) @@ -164,6 +156,16 @@ void vp9_remove_decompressor(VP9D_PTR ptr) { vp9_remove_common(&pbi->common); vp9_worker_end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); + for (i = 0; i < pbi->num_tile_workers; ++i) { + VP9Worker *const worker = &pbi->tile_workers[i]; + vp9_worker_end(worker); + vpx_free(worker->data1); + vpx_free(worker->data2); + } + vpx_free(pbi->tile_workers); + vpx_free(pbi->mi_streams); + vpx_free(pbi->above_context[0]); + vpx_free(pbi->above_seg_context); vpx_free(pbi); } @@ -177,7 +179,6 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR ptr, YV12_BUFFER_CONFIG *sd) { VP9D_COMP *pbi = (VP9D_COMP *) ptr; VP9_COMMON *cm = &pbi->common; - int ref_fb_idx; /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the * encoder is using the frame buffers for. This is just a stub to keep the @@ -185,18 +186,15 @@ vpx_codec_err_t vp9_copy_reference_dec(VP9D_PTR ptr, * later commit that adds VP9-specific controls for this functionality. */ if (ref_frame_flag == VP9_LAST_FLAG) { - ref_fb_idx = cm->ref_frame_map[0]; + YV12_BUFFER_CONFIG *cfg = &cm->yv12_fb[cm->ref_frame_map[0]]; + if (!equal_dimensions(cfg, sd)) + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + else + vp8_yv12_copy_frame(cfg, sd); } else { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); - return cm->error.error_code; - } - - if (!equal_dimensions(&cm->yv12_fb[ref_fb_idx], sd)) { - vpx_internal_error(&cm->error, VPX_CODEC_ERROR, - "Incorrect buffer dimensions"); - } else { - vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd); } return cm->error.error_code; @@ -214,13 +212,13 @@ vpx_codec_err_t vp9_set_reference_dec(VP9D_PTR ptr, VP9_REFFRAME ref_frame_flag, * vpxenc --test-decode functionality working, and will be replaced in a * later commit that adds VP9-specific controls for this functionality. */ - if (ref_frame_flag == VP9_LAST_FLAG) + if (ref_frame_flag == VP9_LAST_FLAG) { ref_fb_ptr = &pbi->common.active_ref_idx[0]; - else if (ref_frame_flag == VP9_GOLD_FLAG) + } else if (ref_frame_flag == VP9_GOLD_FLAG) { ref_fb_ptr = &pbi->common.active_ref_idx[1]; - else if (ref_frame_flag == VP9_ALT_FLAG) + } else if (ref_frame_flag == VP9_ALT_FLAG) { ref_fb_ptr = &pbi->common.active_ref_idx[2]; - else { + } else { vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, "Invalid reference frame"); return pbi->common.error.error_code; @@ -268,7 +266,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { ++ref_index; } - cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; + cm->frame_to_show = get_frame_new_buffer(cm); cm->fb_idx_ref_cnt[cm->new_fb_idx]--; // Invalidate these references until the next frame starts. @@ -277,7 +275,7 @@ static void swap_frame_buffers(VP9D_COMP *pbi) { } int vp9_receive_compressed_data(VP9D_PTR ptr, - uint64_t size, const uint8_t **psource, + size_t size, const uint8_t **psource, int64_t time_stamp) { VP9D_COMP *pbi = (VP9D_COMP *) ptr; VP9_COMMON *cm = &pbi->common; @@ -306,7 +304,7 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, * thing to do here. */ if (cm->active_ref_idx[0] != INT_MAX) - cm->yv12_fb[cm->active_ref_idx[0]].corrupted = 1; + get_frame_ref_buffer(cm, 0)->corrupted = 1; } cm->new_fb_idx = get_free_fb(cm); @@ -323,7 +321,7 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, * thing to do here. */ if (cm->active_ref_idx[0] != INT_MAX) - cm->yv12_fb[cm->active_ref_idx[0]].corrupted = 1; + get_frame_ref_buffer(cm, 0)->corrupted = 1; if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) cm->fb_idx_ref_cnt[cm->new_fb_idx]--; @@ -343,36 +341,33 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, return retcode; } - { - swap_frame_buffers(pbi); + swap_frame_buffers(pbi); #if WRITE_RECON_BUFFER == 2 - if (cm->show_frame) - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame); - else - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 1000); + if (cm->show_frame) + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame); + else + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 1000); #endif - if (!pbi->do_loopfilter_inline) { - /* Apply the loop filter if appropriate. */ - vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0); - } + if (!pbi->do_loopfilter_inline) { + vp9_loop_filter_frame(cm, &pbi->mb, pbi->common.lf.filter_level, 0, 0); + } #if WRITE_RECON_BUFFER == 2 - if (cm->show_frame) - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 2000); - else - write_dx_frame_to_file(cm->frame_to_show, - cm->current_video_frame + 3000); + if (cm->show_frame) + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 2000); + else + write_dx_frame_to_file(cm->frame_to_show, + cm->current_video_frame + 3000); #endif - vp9_extend_frame_inner_borders(cm->frame_to_show, - cm->subsampling_x, - cm->subsampling_y); - } + vp9_extend_frame_inner_borders(cm->frame_to_show, + cm->subsampling_x, + cm->subsampling_y); #if WRITE_RECON_BUFFER == 1 if (cm->show_frame) @@ -398,6 +393,9 @@ int vp9_receive_compressed_data(VP9D_PTR ptr, cm->mi_grid_visible = cm->mi_grid_base + cm->mode_info_stride + 1; cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mode_info_stride + 1; + pbi->mb.mi_8x8 = cm->mi_grid_visible; + pbi->mb.mi_8x8[0] = cm->mi; + cm->current_video_frame++; } diff --git a/source/libvpx/vp9/decoder/vp9_onyxd_int.h b/source/libvpx/vp9/decoder/vp9_onyxd_int.h index a051971..83ea967 100644 --- a/source/libvpx/vp9/decoder/vp9_onyxd_int.h +++ b/source/libvpx/vp9/decoder/vp9_onyxd_int.h @@ -25,7 +25,7 @@ typedef struct VP9Decompressor { VP9D_CONFIG oxcf; const uint8_t *source; - uint32_t source_sz; + size_t source_sz; int64_t last_time_stamp; int ready_for_new_data; @@ -39,6 +39,16 @@ typedef struct VP9Decompressor { int do_loopfilter_inline; // apply loopfilter to available rows immediately VP9Worker lf_worker; + + VP9Worker *tile_workers; + int num_tile_workers; + + /* Each tile column has its own MODE_INFO stream. This array indexes them by + tile column index. */ + MODE_INFO **mi_streams; + + ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; + PARTITION_CONTEXT *above_seg_context; } VP9D_COMP; -#endif // VP9_DECODER_VP9_TREEREADER_H_ +#endif // VP9_DECODER_VP9_ONYXD_INT_H_ diff --git a/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h index c7fa3aa..41a6868 100644 --- a/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h +++ b/source/libvpx/vp9/decoder/vp9_read_bit_buffer.h @@ -8,8 +8,8 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef VP9_READ_BIT_BUFFER_ -#define VP9_READ_BIT_BUFFER_ +#ifndef VP9_DECODER_VP9_READ_BIT_BUFFER_H_ +#define VP9_DECODER_VP9_READ_BIT_BUFFER_H_ #include <limits.h> @@ -57,4 +57,4 @@ static int vp9_rb_read_signed_literal(struct vp9_read_bit_buffer *rb, return vp9_rb_read_bit(rb) ? -value : value; } -#endif // VP9_READ_BIT_BUFFER_ +#endif // VP9_DECODER_VP9_READ_BIT_BUFFER_H_ diff --git a/source/libvpx/vp9/decoder/vp9_thread.c b/source/libvpx/vp9/decoder/vp9_thread.c index dc3b681..d953e72 100644 --- a/source/libvpx/vp9/decoder/vp9_thread.c +++ b/source/libvpx/vp9/decoder/vp9_thread.c @@ -29,7 +29,7 @@ extern "C" { //------------------------------------------------------------------------------ // simplistic pthread emulation layer -#include <process.h> +#include <process.h> // NOLINT // _beginthreadex requires __stdcall #define THREADFN unsigned int __stdcall @@ -145,9 +145,7 @@ static THREADFN thread_loop(void *ptr) { // thread loop pthread_cond_wait(&worker->condition_, &worker->mutex_); } if (worker->status_ == WORK) { - if (worker->hook) { - worker->had_error |= !worker->hook(worker->data1, worker->data2); - } + vp9_worker_execute(worker); worker->status_ = OK; } else if (worker->status_ == NOT_OK) { // finish the worker done = 1; @@ -178,7 +176,7 @@ static void change_state(VP9Worker* const worker, pthread_mutex_unlock(&worker->mutex_); } -#endif +#endif // CONFIG_MULTITHREAD //------------------------------------------------------------------------------ @@ -218,12 +216,17 @@ int vp9_worker_reset(VP9Worker* const worker) { return ok; } +void vp9_worker_execute(VP9Worker* const worker) { + if (worker->hook != NULL) { + worker->had_error |= !worker->hook(worker->data1, worker->data2); + } +} + void vp9_worker_launch(VP9Worker* const worker) { #if CONFIG_MULTITHREAD change_state(worker, WORK); #else - if (worker->hook) - worker->had_error |= !worker->hook(worker->data1, worker->data2); + vp9_worker_execute(worker); #endif } diff --git a/source/libvpx/vp9/decoder/vp9_thread.h b/source/libvpx/vp9/decoder/vp9_thread.h index a8f7e04..a624f3c 100644 --- a/source/libvpx/vp9/decoder/vp9_thread.h +++ b/source/libvpx/vp9/decoder/vp9_thread.h @@ -17,7 +17,7 @@ #ifndef VP9_DECODER_VP9_THREAD_H_ #define VP9_DECODER_VP9_THREAD_H_ -#include "vpx_config.h" +#include "./vpx_config.h" #if defined(__cplusplus) || defined(c_plusplus) extern "C" { @@ -27,7 +27,7 @@ extern "C" { #if defined(_WIN32) -#include <windows.h> +#include <windows.h> // NOLINT typedef HANDLE pthread_t; typedef CRITICAL_SECTION pthread_mutex_t; typedef struct { @@ -38,7 +38,7 @@ typedef struct { #else -#include <pthread.h> +#include <pthread.h> // NOLINT #endif /* _WIN32 */ #endif /* CONFIG_MULTITHREAD */ @@ -80,6 +80,11 @@ int vp9_worker_sync(VP9Worker* const worker); // hook/data1/data2 can be changed at any time before calling this function, // but not be changed afterward until the next call to vp9_worker_sync(). void vp9_worker_launch(VP9Worker* const worker); +// This function is similar to vp9_worker_launch() except that it calls the +// hook directly instead of using a thread. Convenient to bypass the thread +// mechanism while still using the VP9Worker structs. vp9_worker_sync() must +// still be called afterward (for error reporting). +void vp9_worker_execute(VP9Worker* const worker); // Kill the thread and terminate the object. To use the object again, one // must call vp9_worker_reset() again. void vp9_worker_end(VP9Worker* const worker); @@ -90,4 +95,4 @@ void vp9_worker_end(VP9Worker* const worker); } // extern "C" #endif -#endif /* VP9_DECODER_VP9_THREAD_H_ */ +#endif // VP9_DECODER_VP9_THREAD_H_ diff --git a/source/libvpx/vp9/decoder/vp9_treereader.h b/source/libvpx/vp9/decoder/vp9_treereader.h index 710cc4c..f612497 100644 --- a/source/libvpx/vp9/decoder/vp9_treereader.h +++ b/source/libvpx/vp9/decoder/vp9_treereader.h @@ -23,7 +23,8 @@ static int treed_read(vp9_reader *const r, /* !!! must return a 0 or 1 !!! */ const vp9_prob *const p) { register vp9_tree_index i = 0; - while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0); + while ((i = t[ i + vp9_read(r, p[i >> 1])]) > 0) + continue; return -i; } diff --git a/source/libvpx/vp9/decoder/x86/vp9_dequantize_sse2.c b/source/libvpx/vp9/decoder/x86/vp9_dequantize_sse2.c deleted file mode 100644 index 54ec67f..0000000 --- a/source/libvpx/vp9/decoder/x86/vp9_dequantize_sse2.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include <assert.h> -#include <emmintrin.h> // SSE2 -#include "./vpx_config.h" -#include "vpx/vpx_integer.h" -#include "vp9/common/vp9_common.h" -#include "vp9/common/vp9_idct.h" - -void vp9_add_constant_residual_8x8_sse2(const int16_t diff, uint8_t *dest, - int stride) { - uint8_t abs_diff; - __m128i d; - - // Prediction data. - __m128i p0 = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride)); - __m128i p1 = _mm_loadl_epi64((const __m128i *)(dest + 1 * stride)); - __m128i p2 = _mm_loadl_epi64((const __m128i *)(dest + 2 * stride)); - __m128i p3 = _mm_loadl_epi64((const __m128i *)(dest + 3 * stride)); - __m128i p4 = _mm_loadl_epi64((const __m128i *)(dest + 4 * stride)); - __m128i p5 = _mm_loadl_epi64((const __m128i *)(dest + 5 * stride)); - __m128i p6 = _mm_loadl_epi64((const __m128i *)(dest + 6 * stride)); - __m128i p7 = _mm_loadl_epi64((const __m128i *)(dest + 7 * stride)); - - p0 = _mm_unpacklo_epi64(p0, p1); - p2 = _mm_unpacklo_epi64(p2, p3); - p4 = _mm_unpacklo_epi64(p4, p5); - p6 = _mm_unpacklo_epi64(p6, p7); - - // Clip diff value to [0, 255] range. Then, do addition or subtraction - // according to its sign. - if (diff >= 0) { - abs_diff = (diff > 255) ? 255 : diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - - p0 = _mm_adds_epu8(p0, d); - p2 = _mm_adds_epu8(p2, d); - p4 = _mm_adds_epu8(p4, d); - p6 = _mm_adds_epu8(p6, d); - } else { - abs_diff = (diff < -255) ? 255 : -diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - - p0 = _mm_subs_epu8(p0, d); - p2 = _mm_subs_epu8(p2, d); - p4 = _mm_subs_epu8(p4, d); - p6 = _mm_subs_epu8(p6, d); - } - - _mm_storel_epi64((__m128i *)(dest + 0 * stride), p0); - p0 = _mm_srli_si128(p0, 8); - _mm_storel_epi64((__m128i *)(dest + 1 * stride), p0); - - _mm_storel_epi64((__m128i *)(dest + 2 * stride), p2); - p2 = _mm_srli_si128(p2, 8); - _mm_storel_epi64((__m128i *)(dest + 3 * stride), p2); - - _mm_storel_epi64((__m128i *)(dest + 4 * stride), p4); - p4 = _mm_srli_si128(p4, 8); - _mm_storel_epi64((__m128i *)(dest + 5 * stride), p4); - - _mm_storel_epi64((__m128i *)(dest + 6 * stride), p6); - p6 = _mm_srli_si128(p6, 8); - _mm_storel_epi64((__m128i *)(dest + 7 * stride), p6); -} - -void vp9_add_constant_residual_16x16_sse2(const int16_t diff, uint8_t *dest, - int stride) { - uint8_t abs_diff; - __m128i d; - - // Prediction data. - __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); - __m128i p1 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); - __m128i p2 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); - __m128i p3 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); - __m128i p4 = _mm_load_si128((const __m128i *)(dest + 4 * stride)); - __m128i p5 = _mm_load_si128((const __m128i *)(dest + 5 * stride)); - __m128i p6 = _mm_load_si128((const __m128i *)(dest + 6 * stride)); - __m128i p7 = _mm_load_si128((const __m128i *)(dest + 7 * stride)); - __m128i p8 = _mm_load_si128((const __m128i *)(dest + 8 * stride)); - __m128i p9 = _mm_load_si128((const __m128i *)(dest + 9 * stride)); - __m128i p10 = _mm_load_si128((const __m128i *)(dest + 10 * stride)); - __m128i p11 = _mm_load_si128((const __m128i *)(dest + 11 * stride)); - __m128i p12 = _mm_load_si128((const __m128i *)(dest + 12 * stride)); - __m128i p13 = _mm_load_si128((const __m128i *)(dest + 13 * stride)); - __m128i p14 = _mm_load_si128((const __m128i *)(dest + 14 * stride)); - __m128i p15 = _mm_load_si128((const __m128i *)(dest + 15 * stride)); - - // Clip diff value to [0, 255] range. Then, do addition or subtraction - // according to its sign. - if (diff >= 0) { - abs_diff = (diff > 255) ? 255 : diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - - p0 = _mm_adds_epu8(p0, d); - p1 = _mm_adds_epu8(p1, d); - p2 = _mm_adds_epu8(p2, d); - p3 = _mm_adds_epu8(p3, d); - p4 = _mm_adds_epu8(p4, d); - p5 = _mm_adds_epu8(p5, d); - p6 = _mm_adds_epu8(p6, d); - p7 = _mm_adds_epu8(p7, d); - p8 = _mm_adds_epu8(p8, d); - p9 = _mm_adds_epu8(p9, d); - p10 = _mm_adds_epu8(p10, d); - p11 = _mm_adds_epu8(p11, d); - p12 = _mm_adds_epu8(p12, d); - p13 = _mm_adds_epu8(p13, d); - p14 = _mm_adds_epu8(p14, d); - p15 = _mm_adds_epu8(p15, d); - } else { - abs_diff = (diff < -255) ? 255 : -diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - - p0 = _mm_subs_epu8(p0, d); - p1 = _mm_subs_epu8(p1, d); - p2 = _mm_subs_epu8(p2, d); - p3 = _mm_subs_epu8(p3, d); - p4 = _mm_subs_epu8(p4, d); - p5 = _mm_subs_epu8(p5, d); - p6 = _mm_subs_epu8(p6, d); - p7 = _mm_subs_epu8(p7, d); - p8 = _mm_subs_epu8(p8, d); - p9 = _mm_subs_epu8(p9, d); - p10 = _mm_subs_epu8(p10, d); - p11 = _mm_subs_epu8(p11, d); - p12 = _mm_subs_epu8(p12, d); - p13 = _mm_subs_epu8(p13, d); - p14 = _mm_subs_epu8(p14, d); - p15 = _mm_subs_epu8(p15, d); - } - - // Store results - _mm_store_si128((__m128i *)(dest + 0 * stride), p0); - _mm_store_si128((__m128i *)(dest + 1 * stride), p1); - _mm_store_si128((__m128i *)(dest + 2 * stride), p2); - _mm_store_si128((__m128i *)(dest + 3 * stride), p3); - _mm_store_si128((__m128i *)(dest + 4 * stride), p4); - _mm_store_si128((__m128i *)(dest + 5 * stride), p5); - _mm_store_si128((__m128i *)(dest + 6 * stride), p6); - _mm_store_si128((__m128i *)(dest + 7 * stride), p7); - _mm_store_si128((__m128i *)(dest + 8 * stride), p8); - _mm_store_si128((__m128i *)(dest + 9 * stride), p9); - _mm_store_si128((__m128i *)(dest + 10 * stride), p10); - _mm_store_si128((__m128i *)(dest + 11 * stride), p11); - _mm_store_si128((__m128i *)(dest + 12 * stride), p12); - _mm_store_si128((__m128i *)(dest + 13 * stride), p13); - _mm_store_si128((__m128i *)(dest + 14 * stride), p14); - _mm_store_si128((__m128i *)(dest + 15 * stride), p15); -} - -void vp9_add_constant_residual_32x32_sse2(const int16_t diff, uint8_t *dest, - int stride) { - uint8_t abs_diff; - __m128i d; - int i = 8; - - if (diff >= 0) { - abs_diff = (diff > 255) ? 255 : diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - } else { - abs_diff = (diff < -255) ? 255 : -diff; - d = _mm_shuffle_epi32(_mm_cvtsi32_si128((int)(abs_diff * 0x01010101u)), 0); - } - - do { - // Prediction data. - __m128i p0 = _mm_load_si128((const __m128i *)(dest + 0 * stride)); - __m128i p1 = _mm_load_si128((const __m128i *)(dest + 0 * stride + 16)); - __m128i p2 = _mm_load_si128((const __m128i *)(dest + 1 * stride)); - __m128i p3 = _mm_load_si128((const __m128i *)(dest + 1 * stride + 16)); - __m128i p4 = _mm_load_si128((const __m128i *)(dest + 2 * stride)); - __m128i p5 = _mm_load_si128((const __m128i *)(dest + 2 * stride + 16)); - __m128i p6 = _mm_load_si128((const __m128i *)(dest + 3 * stride)); - __m128i p7 = _mm_load_si128((const __m128i *)(dest + 3 * stride + 16)); - - // Clip diff value to [0, 255] range. Then, do addition or subtraction - // according to its sign. - if (diff >= 0) { - p0 = _mm_adds_epu8(p0, d); - p1 = _mm_adds_epu8(p1, d); - p2 = _mm_adds_epu8(p2, d); - p3 = _mm_adds_epu8(p3, d); - p4 = _mm_adds_epu8(p4, d); - p5 = _mm_adds_epu8(p5, d); - p6 = _mm_adds_epu8(p6, d); - p7 = _mm_adds_epu8(p7, d); - } else { - p0 = _mm_subs_epu8(p0, d); - p1 = _mm_subs_epu8(p1, d); - p2 = _mm_subs_epu8(p2, d); - p3 = _mm_subs_epu8(p3, d); - p4 = _mm_subs_epu8(p4, d); - p5 = _mm_subs_epu8(p5, d); - p6 = _mm_subs_epu8(p6, d); - p7 = _mm_subs_epu8(p7, d); - } - - // Store results - _mm_store_si128((__m128i *)(dest + 0 * stride), p0); - _mm_store_si128((__m128i *)(dest + 0 * stride + 16), p1); - _mm_store_si128((__m128i *)(dest + 1 * stride), p2); - _mm_store_si128((__m128i *)(dest + 1 * stride + 16), p3); - _mm_store_si128((__m128i *)(dest + 2 * stride), p4); - _mm_store_si128((__m128i *)(dest + 2 * stride + 16), p5); - _mm_store_si128((__m128i *)(dest + 3 * stride), p6); - _mm_store_si128((__m128i *)(dest + 3 * stride + 16), p7); - - dest += 4 * stride; - } while (--i); -} diff --git a/source/libvpx/vp9/vp9_dx_iface.c b/source/libvpx/vp9/vp9_dx_iface.c index a70cdce..5dacab4 100644 --- a/source/libvpx/vp9/vp9_dx_iface.c +++ b/source/libvpx/vp9/vp9_dx_iface.c @@ -14,7 +14,7 @@ #include "vpx/vpx_decoder.h" #include "vpx/vp8dx.h" #include "vpx/internal/vpx_codec_internal.h" -#include "vpx_version.h" +#include "./vpx_version.h" #include "vp9/decoder/vp9_onyxd.h" #include "vp9/decoder/vp9_onyxd_int.h" #include "vp9/decoder/vp9_read_bit_buffer.h" @@ -172,9 +172,9 @@ static vpx_codec_err_t vp9_peek_si(const uint8_t *data, rb.bit_offset += 1; // show frame rb.bit_offset += 1; // error resilient - if (vp9_rb_read_literal(&rb, 8) != SYNC_CODE_0 || - vp9_rb_read_literal(&rb, 8) != SYNC_CODE_1 || - vp9_rb_read_literal(&rb, 8) != SYNC_CODE_2) { + if (vp9_rb_read_literal(&rb, 8) != VP9_SYNC_CODE_0 || + vp9_rb_read_literal(&rb, 8) != VP9_SYNC_CODE_1 || + vp9_rb_read_literal(&rb, 8) != VP9_SYNC_CODE_2) { return VPX_CODEC_UNSUP_BITSTREAM; } @@ -205,7 +205,6 @@ static vpx_codec_err_t vp9_peek_si(const uint8_t *data, static vpx_codec_err_t vp9_get_si(vpx_codec_alg_priv_t *ctx, vpx_codec_stream_info_t *si) { - unsigned int sz; if (si->sz >= sizeof(vp9_stream_info_t)) @@ -323,15 +322,20 @@ static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, vp9_ppflags_t flags = {0}; if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { - flags.post_proc_flag = ctx->postproc_cfg.post_proc_flag + flags.post_proc_flag = #if CONFIG_POSTPROC_VISUALIZER - - | ((ctx->dbg_color_ref_frame_flag != 0) ? VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) - | ((ctx->dbg_color_mb_modes_flag != 0) ? VP9D_DEBUG_CLR_BLK_MODES : 0) - | ((ctx->dbg_color_b_modes_flag != 0) ? VP9D_DEBUG_CLR_BLK_MODES : 0) - | ((ctx->dbg_display_mv_flag != 0) ? VP9D_DEBUG_DRAW_MV : 0) + ((ctx->dbg_color_ref_frame_flag != 0) ? + VP9D_DEBUG_CLR_FRM_REF_BLKS : 0) + | ((ctx->dbg_color_mb_modes_flag != 0) ? + VP9D_DEBUG_CLR_BLK_MODES : 0) + | ((ctx->dbg_color_b_modes_flag != 0) ? + VP9D_DEBUG_CLR_BLK_MODES : 0) + | ((ctx->dbg_display_mv_flag != 0) ? + VP9D_DEBUG_DRAW_MV : 0) + | #endif -; + ctx->postproc_cfg.post_proc_flag; + flags.deblocking_level = ctx->postproc_cfg.deblocking_level; flags.noise_level = ctx->postproc_cfg.noise_level; #if CONFIG_POSTPROC_VISUALIZER @@ -496,8 +500,9 @@ static vpx_codec_err_t vp9_xma_get_mmap(const vpx_codec_ctx_t *ctx, mmap->sz = seg_iter->calc_sz(ctx->config.dec, ctx->init_flags); res = VPX_CODEC_OK; - } else + } else { res = VPX_CODEC_LIST_END; + } } while (!mmap->sz && res != VPX_CODEC_LIST_END); return res; @@ -542,7 +547,6 @@ static vpx_codec_err_t vp9_xma_set_mmap(vpx_codec_ctx_t *ctx, static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { @@ -553,15 +557,14 @@ static vpx_codec_err_t set_reference(vpx_codec_alg_priv_t *ctx, return vp9_set_reference_dec(ctx->pbi, (VP9_REFFRAME)frame->frame_type, &sd); - } else + } else { return VPX_CODEC_INVALID_PARAM; - + } } static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { - vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { @@ -572,9 +575,9 @@ static vpx_codec_err_t copy_reference(vpx_codec_alg_priv_t *ctx, return vp9_copy_reference_dec(ctx->pbi, (VP9_REFFRAME)frame->frame_type, &sd); - } else + } else { return VPX_CODEC_INVALID_PARAM; - + } } static vpx_codec_err_t get_reference(vpx_codec_alg_priv_t *ctx, @@ -603,9 +606,9 @@ static vpx_codec_err_t set_postproc(vpx_codec_alg_priv_t *ctx, ctx->postproc_cfg_set = 1; ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); return VPX_CODEC_OK; - } else + } else { return VPX_CODEC_INVALID_PARAM; - + } #else return VPX_CODEC_INCAPABLE; #endif @@ -642,15 +645,15 @@ static vpx_codec_err_t get_last_ref_updates(vpx_codec_alg_priv_t *ctx, *update_info = pbi->refresh_frame_flags; return VPX_CODEC_OK; - } else + } else { return VPX_CODEC_INVALID_PARAM; + } } static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, int ctrl_id, va_list args) { - int *corrupted = va_arg(args, int *); if (corrupted) { @@ -660,9 +663,9 @@ static vpx_codec_err_t get_frame_corrupted(vpx_codec_alg_priv_t *ctx, else return VPX_CODEC_ERROR; return VPX_CODEC_OK; - } else + } else { return VPX_CODEC_INVALID_PARAM; - + } } static vpx_codec_err_t set_invert_tile_order(vpx_codec_alg_priv_t *ctx, @@ -701,13 +704,13 @@ CODEC_INTERFACE(vpx_codec_vp9_dx) = { ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ vp9_xma_get_mmap, /* vpx_codec_get_mmap_fn_t get_mmap; */ vp9_xma_set_mmap, /* vpx_codec_set_mmap_fn_t set_mmap; */ - { + { // NOLINT vp9_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */ vp9_get_si, /* vpx_codec_get_si_fn_t get_si; */ vp9_decode, /* vpx_codec_decode_fn_t decode; */ vp9_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ }, - { + { // NOLINT /* encoder functions */ NOT_IMPLEMENTED, NOT_IMPLEMENTED, diff --git a/source/libvpx/vpx_scale/mips/dspr2/yv12extend_dspr2.c b/source/libvpx/vpx_scale/mips/dspr2/yv12extend_dspr2.c new file mode 100644 index 0000000..2c5cd1a --- /dev/null +++ b/source/libvpx/vpx_scale/mips/dspr2/yv12extend_dspr2.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "./vpx_config.h" +#include "vpx_scale/yv12config.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_scale/vpx_scale.h" + +#if HAVE_DSPR2 +static void extend_plane(uint8_t *const src, int src_stride, + int width, int height, + int extend_top, int extend_left, + int extend_bottom, int extend_right) { + int i, j; + uint8_t *left_src, *right_src; + uint8_t *left_dst_start, *right_dst_start; + uint8_t *left_dst, *right_dst; + uint8_t *top_src, *bot_src; + uint8_t *top_dst, *bot_dst; + uint32_t left_pix; + uint32_t right_pix; + uint32_t linesize; + + /* copy the left and right most columns out */ + left_src = src; + right_src = src + width - 1; + left_dst_start = src - extend_left; + right_dst_start = src + width; + + for (i = height; i--; ) { + left_dst = left_dst_start; + right_dst = right_dst_start; + + __asm__ __volatile__ ( + "lb %[left_pix], 0(%[left_src]) \n\t" + "lb %[right_pix], 0(%[right_src]) \n\t" + "replv.qb %[left_pix], %[left_pix] \n\t" + "replv.qb %[right_pix], %[right_pix] \n\t" + + : [left_pix] "=&r" (left_pix), [right_pix] "=&r" (right_pix) + : [left_src] "r" (left_src), [right_src] "r" (right_src) + ); + + for (j = extend_left/4; j--; ) { + __asm__ __volatile__ ( + "sw %[left_pix], 0(%[left_dst]) \n\t" + "sw %[right_pix], 0(%[right_dst]) \n\t" + + : + : [left_dst] "r" (left_dst), [left_pix] "r" (left_pix), + [right_dst] "r" (right_dst), [right_pix] "r" (right_pix) + ); + + left_dst += 4; + right_dst += 4; + } + + for (j = extend_left%4; j--; ) { + __asm__ __volatile__ ( + "sb %[left_pix], 0(%[left_dst]) \n\t" + "sb %[right_pix], 0(%[right_dst]) \n\t" + + : + : [left_dst] "r" (left_dst), [left_pix] "r" (left_pix), + [right_dst] "r" (right_dst), [right_pix] "r" (right_pix) + ); + + left_dst += 1; + right_dst += 1; + } + + left_src += src_stride; + right_src += src_stride; + left_dst_start += src_stride; + right_dst_start += src_stride; + } + + /* Now copy the top and bottom lines into each line of the respective + * borders + */ + top_src = src - extend_left; + bot_src = src + src_stride * (height - 1) - extend_left; + top_dst = src + src_stride * (-extend_top) - extend_left; + bot_dst = src + src_stride * (height) - extend_left; + linesize = extend_left + extend_right + width; + + for (i = 0; i < extend_top; i++) { + vpx_memcpy(top_dst, top_src, linesize); + top_dst += src_stride; + } + + for (i = 0; i < extend_bottom; i++) { + vpx_memcpy(bot_dst, bot_src, linesize); + bot_dst += src_stride; + } +} + +static void extend_frame(YV12_BUFFER_CONFIG *const ybf, + int subsampling_x, int subsampling_y, + int ext_size) { + const int c_w = (ybf->y_crop_width + subsampling_x) >> subsampling_x; + const int c_h = (ybf->y_crop_height + subsampling_y) >> subsampling_y; + const int c_et = ext_size >> subsampling_y; + const int c_el = ext_size >> subsampling_x; + const int c_eb = (ext_size + ybf->y_height - ybf->y_crop_height + + subsampling_y) >> subsampling_y; + const int c_er = (ext_size + ybf->y_width - ybf->y_crop_width + + subsampling_x) >> subsampling_x; + + assert(ybf->y_height - ybf->y_crop_height < 16); + assert(ybf->y_width - ybf->y_crop_width < 16); + assert(ybf->y_height - ybf->y_crop_height >= 0); + assert(ybf->y_width - ybf->y_crop_width >= 0); + + extend_plane(ybf->y_buffer, ybf->y_stride, + ybf->y_crop_width, ybf->y_crop_height, + ext_size, ext_size, + ext_size + ybf->y_height - ybf->y_crop_height, + ext_size + ybf->y_width - ybf->y_crop_width); + + extend_plane(ybf->u_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); + + extend_plane(ybf->v_buffer, ybf->uv_stride, + c_w, c_h, c_et, c_el, c_eb, c_er); +} + +void vp9_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf, + int subsampling_x, int subsampling_y) { + extend_frame(ybf, subsampling_x, subsampling_y, ybf->border); +} + +void vp9_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf, + int subsampling_x, + int subsampling_y) { + const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ? + VP9INNERBORDERINPIXELS : ybf->border; + extend_frame(ybf, subsampling_x, subsampling_y, inner_bw); +} +#endif diff --git a/source/libvpx/vpx_scale/vpx_scale.mk b/source/libvpx/vpx_scale/vpx_scale.mk index 76c11e7..50d3e9d 100644 --- a/source/libvpx/vpx_scale/vpx_scale.mk +++ b/source/libvpx/vpx_scale/vpx_scale.mk @@ -16,6 +16,9 @@ SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_copysrcframe_func_neon$(ASM) SCALE_SRCS-$(HAVE_NEON) += arm/neon/vp8_vpxyv12_extendframeborders_neon$(ASM) SCALE_SRCS-$(HAVE_NEON) += arm/neon/yv12extend_arm.c +#mips(dspr2) +SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c + SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes) $(eval $(call asm_offsets_template,\ diff --git a/source/libvpx/vpx_scale/vpx_scale_rtcd.sh b/source/libvpx/vpx_scale/vpx_scale_rtcd.sh index ea7b0e2..a5faf11 100755 --- a/source/libvpx/vpx_scale/vpx_scale_rtcd.sh +++ b/source/libvpx/vpx_scale/vpx_scale_rtcd.sh @@ -27,8 +27,8 @@ specialize vpx_yv12_copy_y neon if [ "$CONFIG_VP9" = "yes" ]; then prototype void vp9_extend_frame_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y" - specialize vp9_extend_frame_borders + specialize vp9_extend_frame_borders dspr2 prototype void vp9_extend_frame_inner_borders "struct yv12_buffer_config *ybf, int subsampling_x, int subsampling_y" - specialize vp9_extend_frame_inner_borders_c + specialize vp9_extend_frame_inner_borders dspr2 fi diff --git a/source/libvpx/vpxenc.c b/source/libvpx/vpxenc.c index 0c742ca..aa99c6b 100644 --- a/source/libvpx/vpxenc.c +++ b/source/libvpx/vpxenc.c @@ -45,8 +45,8 @@ #include "vpx_ports/vpx_timer.h" #include "tools_common.h" #include "y4minput.h" -#include "libmkv/EbmlWriter.h" -#include "libmkv/EbmlIDs.h" +#include "third_party/libmkv/EbmlWriter.h" +#include "third_party/libmkv/EbmlIDs.h" #include "third_party/libyuv/include/libyuv/scale.h" /* Need special handling of these functions on Windows */ @@ -725,14 +725,12 @@ write_webm_file_header(EbmlGlobal *glob, { unsigned int pixelWidth = cfg->g_w; unsigned int pixelHeight = cfg->g_h; - float frameRate = (float)fps->num / (float)fps->den; EbmlLoc videoStart; Ebml_StartSubElement(glob, &videoStart, Video); Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth); Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight); Ebml_SerializeUnsigned(glob, StereoMode, stereo_fmt); - Ebml_SerializeFloat(glob, FrameRate, frameRate); Ebml_EndSubElement(glob, &videoStart); } Ebml_EndSubElement(glob, &start); /* Track Entry */ @@ -883,10 +881,10 @@ static unsigned int murmur(const void *key, int len, unsigned int seed) { while (len >= 4) { unsigned int k; - k = data[0]; - k |= data[1] << 8; - k |= data[2] << 16; - k |= data[3] << 24; + k = (unsigned int)data[0]; + k |= (unsigned int)data[1] << 8; + k |= (unsigned int)data[2] << 16; + k |= (unsigned int)data[3] << 24; k *= m; k ^= k >> r; @@ -1765,9 +1763,13 @@ static void parse_global_config(struct global_config *global, char **argv) { /* Validate global config */ if (global->passes == 0) { +#if CONFIG_VP9_ENCODER // Make default VP9 passes = 2 until there is a better quality 1-pass // encoder global->passes = (global->codec->iface == vpx_codec_vp9_cx ? 2 : 1); +#else + global->passes = 1; +#endif } if (global->pass) { @@ -2671,8 +2673,8 @@ int main(int argc, const char **argv_) { fprintf(stderr, "%7"PRId64" %s %.2f %s ", cx_time > 9999999 ? cx_time / 1000 : cx_time, cx_time > 9999999 ? "ms" : "us", - fps >= 1.0 ? fps : 1000.0 / fps, - fps >= 1.0 ? "fps" : "ms/f"); + fps >= 1.0 ? fps : fps * 60, + fps >= 1.0 ? "fps" : "fpm"); print_time("ETA", estimated_time_left); fprintf(stderr, "\033[K"); } @@ -2782,16 +2784,17 @@ int main(int argc, const char **argv_) { /* TODO(jkoleszar): This doesn't belong in this executable. Do it for now, * to match some existing utilities. */ - FOREACH_STREAM({ - FILE *f = fopen("opsnr.stt", "a"); - if (stream->mismatch_seen) { - fprintf(f, "First mismatch occurred in frame %d\n", - stream->mismatch_seen); - } else { - fprintf(f, "No mismatch detected in recon buffers\n"); - } - fclose(f); - }); + if (!(global.pass == 1 && global.passes == 2)) + FOREACH_STREAM({ + FILE *f = fopen("opsnr.stt", "a"); + if (stream->mismatch_seen) { + fprintf(f, "First mismatch occurred in frame %d\n", + stream->mismatch_seen); + } else { + fprintf(f, "No mismatch detected in recon buffers\n"); + } + fclose(f); + }); #endif vpx_img_free(&raw); |