// libjingle // Copyright 2010 Google Inc. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // 1. Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // 2. Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // 3. The name of the author may not be used to endorse or promote products // derived from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Declaration of abstract class VideoCapturer #ifndef TALK_MEDIA_BASE_VIDEOCAPTURER_H_ #define TALK_MEDIA_BASE_VIDEOCAPTURER_H_ #include #include #include "talk/media/base/mediachannel.h" #include "talk/media/base/videoadapter.h" #include "talk/media/base/videocommon.h" #include "talk/media/base/videoframefactory.h" #include "talk/media/devices/devicemanager.h" #include "webrtc/base/basictypes.h" #include "webrtc/base/criticalsection.h" #include "webrtc/base/messagehandler.h" #include "webrtc/base/rollingaccumulator.h" #include "webrtc/base/scoped_ptr.h" #include "webrtc/base/sigslot.h" #include "webrtc/base/thread.h" #include "webrtc/base/timing.h" namespace cricket { class VideoProcessor; // Current state of the capturer. // TODO(hellner): CS_NO_DEVICE is an error code not a capture state. Separate // error codes and states. enum CaptureState { CS_STOPPED, // The capturer has been stopped or hasn't started yet. CS_STARTING, // The capturer is in the process of starting. Note, it may // still fail to start. CS_RUNNING, // The capturer has been started successfully and is now // capturing. CS_PAUSED, // The capturer has been paused. CS_FAILED, // The capturer failed to start. CS_NO_DEVICE, // The capturer has no device and consequently failed to start. }; class VideoFrame; struct CapturedFrame { static const uint32 kFrameHeaderSize = 40; // Size from width to data_size. static const uint32 kUnknownDataSize = 0xFFFFFFFF; CapturedFrame(); // Get the number of bytes of the frame data. If data_size is known, return // it directly. Otherwise, calculate the size based on width, height, and // fourcc. Return true if succeeded. bool GetDataSize(uint32* size) const; // The width and height of the captured frame could be different from those // of VideoFormat. Once the first frame is captured, the width, height, // fourcc, pixel_width, and pixel_height should keep the same over frames. int width; // in number of pixels int height; // in number of pixels uint32 fourcc; // compression uint32 pixel_width; // width of a pixel, default is 1 uint32 pixel_height; // height of a pixel, default is 1 int64 elapsed_time; // elapsed time since the creation of the frame // source (that is, the camera), in nanoseconds. int64 time_stamp; // timestamp of when the frame was captured, in unix // time with nanosecond units. uint32 data_size; // number of bytes of the frame data int rotation; // rotation in degrees of the frame (0, 90, 180, 270) void* data; // pointer to the frame data. This object allocates the // memory or points to an existing memory. private: DISALLOW_COPY_AND_ASSIGN(CapturedFrame); }; // VideoCapturer is an abstract class that defines the interfaces for video // capturing. The subclasses implement the video capturer for various types of // capturers and various platforms. // // The captured frames may need to be adapted (for example, cropping). // Video adaptation is built into and enabled by default. After a frame has // been captured from the device, it is sent to the video adapter, then video // processors, then out to the encoder. // // Programming model: // Create an object of a subclass of VideoCapturer // Initialize // SignalStateChange.connect() // SignalFrameCaptured.connect() // Find the capture format for Start() by either calling GetSupportedFormats() // and selecting one of the supported or calling GetBestCaptureFormat(). // video_adapter()->OnOutputFormatRequest(desired_encoding_format) // Start() // GetCaptureFormat() optionally // Stop() // // Assumption: // The Start() and Stop() methods are called by a single thread (E.g., the // media engine thread). Hence, the VideoCapture subclasses dont need to be // thread safe. // class VideoCapturer : public sigslot::has_slots<>, public rtc::MessageHandler { public: typedef std::vector VideoProcessors; // All signals are marshalled to |thread| or the creating thread if // none is provided. VideoCapturer(); explicit VideoCapturer(rtc::Thread* thread); virtual ~VideoCapturer() {} // Gets the id of the underlying device, which is available after the capturer // is initialized. Can be used to determine if two capturers reference the // same device. const std::string& GetId() const { return id_; } // Get the capture formats supported by the video capturer. The supported // formats are non empty after the device has been opened successfully. const std::vector* GetSupportedFormats() const; // Get the best capture format for the desired format. The best format is the // same as one of the supported formats except that the frame interval may be // different. If the application asks for 16x9 and the camera does not support // 16x9 HD or the application asks for 16x10, we find the closest 4x3 and then // crop; Otherwise, we find what the application asks for. Note that we assume // that for HD, the desired format is always 16x9. The subclasses can override // the default implementation. // Parameters // desired: the input desired format. If desired.fourcc is not kAnyFourcc, // the best capture format has the exactly same fourcc. Otherwise, // the best capture format uses a fourcc in GetPreferredFourccs(). // best_format: the output of the best capture format. // Return false if there is no such a best format, that is, the desired format // is not supported. virtual bool GetBestCaptureFormat(const VideoFormat& desired, VideoFormat* best_format); // TODO(hellner): deprecate (make private) the Start API in favor of this one. // Also remove CS_STARTING as it is implied by the return // value of StartCapturing(). bool StartCapturing(const VideoFormat& capture_format); // Start the video capturer with the specified capture format. // Parameter // capture_format: The caller got this parameter by either calling // GetSupportedFormats() and selecting one of the supported // or calling GetBestCaptureFormat(). // Return // CS_STARTING: The capturer is trying to start. Success or failure will // be notified via the |SignalStateChange| callback. // CS_RUNNING: if the capturer is started and capturing. // CS_PAUSED: Will never be returned. // CS_FAILED: if the capturer failes to start.. // CS_NO_DEVICE: if the capturer has no device and fails to start. virtual CaptureState Start(const VideoFormat& capture_format) = 0; // Sets the desired aspect ratio. If the capturer is capturing at another // aspect ratio it will crop the width or the height so that asked for // aspect ratio is acheived. Note that ratio_w and ratio_h do not need to be // relatively prime. void UpdateAspectRatio(int ratio_w, int ratio_h); void ClearAspectRatio(); // Get the current capture format, which is set by the Start() call. // Note that the width and height of the captured frames may differ from the // capture format. For example, the capture format is HD but the captured // frames may be smaller than HD. const VideoFormat* GetCaptureFormat() const { return capture_format_.get(); } // Pause the video capturer. virtual bool Pause(bool paused); // Stop the video capturer. virtual void Stop() = 0; // Check if the video capturer is running. virtual bool IsRunning() = 0; // Restart the video capturer with the new |capture_format|. // Default implementation stops and starts the capturer. virtual bool Restart(const VideoFormat& capture_format); // TODO(thorcarpenter): This behavior of keeping the camera open just to emit // black frames is a total hack and should be fixed. // When muting, produce black frames then pause the camera. // When unmuting, start the camera. Camera starts unmuted. virtual bool MuteToBlackThenPause(bool muted); virtual bool IsMuted() const { return muted_; } CaptureState capture_state() const { return capture_state_; } // Adds a video processor that will be applied on VideoFrames returned by // |SignalVideoFrame|. Multiple video processors can be added. The video // processors will be applied in the order they were added. void AddVideoProcessor(VideoProcessor* video_processor); // Removes the |video_processor| from the list of video processors or // returns false. bool RemoveVideoProcessor(VideoProcessor* video_processor); // Returns true if the capturer is screencasting. This can be used to // implement screencast specific behavior. virtual bool IsScreencast() const = 0; // Caps the VideoCapturer's format according to max_format. It can e.g. be // used to prevent cameras from capturing at a resolution or framerate that // the capturer is capable of but not performing satisfactorily at. // The capping is an upper bound for each component of the capturing format. // The fourcc component is ignored. void ConstrainSupportedFormats(const VideoFormat& max_format); void set_enable_camera_list(bool enable_camera_list) { enable_camera_list_ = enable_camera_list; } bool enable_camera_list() { return enable_camera_list_; } // Enable scaling to ensure square pixels. void set_square_pixel_aspect_ratio(bool square_pixel_aspect_ratio) { square_pixel_aspect_ratio_ = square_pixel_aspect_ratio; } bool square_pixel_aspect_ratio() { return square_pixel_aspect_ratio_; } // Signal all capture state changes that are not a direct result of calling // Start(). sigslot::signal2 SignalStateChange; // Frame callbacks are multithreaded to allow disconnect and connect to be // called concurrently. It also ensures that it is safe to call disconnect // at any time which is needed since the signal may be called from an // unmarshalled thread owned by the VideoCapturer. // Signal the captured frame to downstream. sigslot::signal2 SignalFrameCaptured; // Signal the captured and possibly adapted frame to downstream consumers // such as the encoder. sigslot::signal2 SignalVideoFrame; const VideoProcessors& video_processors() const { return video_processors_; } // If 'screencast_max_pixels' is set greater than zero, screencasts will be // scaled to be no larger than this value. // If set to zero, the max pixels will be limited to // Retina MacBookPro 15" resolution of 2880 x 1800. // For high fps, maximum pixels limit is set based on common 24" monitor // resolution of 2048 x 1280. int screencast_max_pixels() const { return screencast_max_pixels_; } void set_screencast_max_pixels(int p) { screencast_max_pixels_ = rtc::_max(0, p); } // If true, run video adaptation. By default, video adaptation is enabled // and users must call video_adapter()->OnOutputFormatRequest() // to receive frames. bool enable_video_adapter() const { return enable_video_adapter_; } void set_enable_video_adapter(bool enable_video_adapter) { enable_video_adapter_ = enable_video_adapter; } CoordinatedVideoAdapter* video_adapter() { return &video_adapter_; } const CoordinatedVideoAdapter* video_adapter() const { return &video_adapter_; } // Takes ownership. void set_frame_factory(VideoFrameFactory* frame_factory) { frame_factory_.reset(frame_factory); } // Gets statistics for tracked variables recorded since the last call to // GetStats. Note that calling GetStats resets any gathered data so it // should be called only periodically to log statistics. void GetStats(VariableInfo* adapt_drop_stats, VariableInfo* effect_drop_stats, VariableInfo* frame_time_stats, VideoFormat* last_captured_frame_format); protected: // Callback attached to SignalFrameCaptured where SignalVideoFrames is called. void OnFrameCaptured(VideoCapturer* video_capturer, const CapturedFrame* captured_frame); void SetCaptureState(CaptureState state); // Marshals SignalStateChange onto thread_. void OnMessage(rtc::Message* message); // subclasses override this virtual method to provide a vector of fourccs, in // order of preference, that are expected by the media engine. virtual bool GetPreferredFourccs(std::vector* fourccs) = 0; // mutators to set private attributes void SetId(const std::string& id) { id_ = id; } void SetCaptureFormat(const VideoFormat* format) { capture_format_.reset(format ? new VideoFormat(*format) : NULL); if (capture_format_) { ASSERT(capture_format_->interval > 0 && "Capture format expected to have positive interval."); // Video adapter really only cares about capture format interval. video_adapter_.SetInputFormat(*capture_format_); } } void SetSupportedFormats(const std::vector& formats); VideoFrameFactory* frame_factory() { return frame_factory_.get(); } private: void Construct(); // Get the distance between the desired format and the supported format. // Return the max distance if they mismatch. See the implementation for // details. int64 GetFormatDistance(const VideoFormat& desired, const VideoFormat& supported); // Convert captured frame to readable string for LOG messages. std::string ToString(const CapturedFrame* frame) const; // Applies all registered processors. If any of the processors signal that // the frame should be dropped the return value will be false. Note that // this frame should be dropped as it has not applied all processors. bool ApplyProcessors(VideoFrame* video_frame); // Updates filtered_supported_formats_ so that it contains the formats in // supported_formats_ that fulfill all applied restrictions. void UpdateFilteredSupportedFormats(); // Returns true if format doesn't fulfill all applied restrictions. bool ShouldFilterFormat(const VideoFormat& format) const; void UpdateStats(const CapturedFrame* captured_frame); // Helper function to save statistics on the current data from a // RollingAccumulator into stats. template static void GetVariableSnapshot( const rtc::RollingAccumulator& data, VariableInfo* stats); rtc::Thread* thread_; std::string id_; CaptureState capture_state_; rtc::scoped_ptr frame_factory_; rtc::scoped_ptr capture_format_; std::vector supported_formats_; rtc::scoped_ptr max_format_; std::vector filtered_supported_formats_; int ratio_w_; // View resolution. e.g. 1280 x 720. int ratio_h_; bool enable_camera_list_; bool square_pixel_aspect_ratio_; // Enable scaling to square pixels. int scaled_width_; // Current output size from ComputeScale. int scaled_height_; int screencast_max_pixels_; // Downscale screencasts further if requested. bool muted_; int black_frame_count_down_; bool enable_video_adapter_; CoordinatedVideoAdapter video_adapter_; rtc::Timing frame_length_time_reporter_; rtc::CriticalSection frame_stats_crit_; int adapt_frame_drops_; rtc::RollingAccumulator adapt_frame_drops_data_; int effect_frame_drops_; rtc::RollingAccumulator effect_frame_drops_data_; double previous_frame_time_; rtc::RollingAccumulator frame_time_data_; // The captured frame format before potential adapation. VideoFormat last_captured_frame_format_; rtc::CriticalSection crit_; VideoProcessors video_processors_; DISALLOW_COPY_AND_ASSIGN(VideoCapturer); }; } // namespace cricket #endif // TALK_MEDIA_BASE_VIDEOCAPTURER_H_