Commit c3315986 authored by zijiehe's avatar zijiehe Committed by Commit Bot

[Chromoting] Implement down mixing in AudioPump

This change implements down mixing logic in AudioPump.

It adds 3 / 4 / 5 / 6 / 7 / 8 channels support in AudioPacket and down mixes the
packet into stereo before encoding. The newly added logic will only be executed
once multichannel output is returned by Windows API.

R=SergeyU@chromium.org, JoeDow@chromium.org
BUG=669070

Review-Url: https://codereview.chromium.org/2903153004
Cr-Commit-Position: refs/heads/master@{#478488}
parent 742eb786
......@@ -294,6 +294,7 @@ static_library("host") {
"//crypto",
"//device/power_save_blocker",
"//google_apis",
"//media",
"//remoting/base",
"//remoting/base:authorization",
"//remoting/host/security_key",
......
......@@ -21,7 +21,6 @@
#include "remoting/host/win/default_audio_device_change_detector.h"
namespace {
const int kChannels = 2;
const int kBytesPerSample = 2;
const int kBitsPerSample = kBytesPerSample * 8;
// Conversion factor from 100ns to 1ms.
......@@ -39,6 +38,7 @@ const int kMinTimerInterval = 30;
// Upper bound for the timer precision error, in milliseconds.
// Timers are supposed to be accurate to 20ms, so we use 30ms to be safe.
const int kMaxExpectedTimerLag = 30;
} // namespace
namespace remoting {
......@@ -150,58 +150,50 @@ bool AudioCapturerWin::Initialize() {
return false;
}
// Set the wave format
switch (wave_format_ex_->wFormatTag) {
case WAVE_FORMAT_IEEE_FLOAT:
// Intentional fall-through.
case WAVE_FORMAT_PCM:
if (!AudioCapturer::IsValidSampleRate(wave_format_ex_->nSamplesPerSec)) {
LOG(ERROR) << "Host sampling rate is neither 44.1 kHz nor 48 kHz.";
return false;
}
sampling_rate_ = static_cast<AudioPacket::SamplingRate>(
wave_format_ex_->nSamplesPerSec);
wave_format_ex_->wFormatTag = WAVE_FORMAT_PCM;
wave_format_ex_->nChannels = kChannels;
wave_format_ex_->wBitsPerSample = kBitsPerSample;
wave_format_ex_->nBlockAlign = kChannels * kBytesPerSample;
wave_format_ex_->nAvgBytesPerSec =
sampling_rate_ * kChannels * kBytesPerSample;
break;
case WAVE_FORMAT_EXTENSIBLE: {
PWAVEFORMATEXTENSIBLE wave_format_extensible =
reinterpret_cast<WAVEFORMATEXTENSIBLE*>(
static_cast<WAVEFORMATEX*>(wave_format_ex_));
if (IsEqualGUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT,
wave_format_extensible->SubFormat)) {
if (!AudioCapturer::IsValidSampleRate(
wave_format_extensible->Format.nSamplesPerSec)) {
LOG(ERROR) << "Host sampling rate is neither 44.1 kHz nor 48 kHz.";
return false;
}
sampling_rate_ = static_cast<AudioPacket::SamplingRate>(
wave_format_extensible->Format.nSamplesPerSec);
wave_format_extensible->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
wave_format_extensible->Samples.wValidBitsPerSample = kBitsPerSample;
wave_format_extensible->Format.nChannels = kChannels;
wave_format_extensible->Format.nSamplesPerSec = sampling_rate_;
wave_format_extensible->Format.wBitsPerSample = kBitsPerSample;
wave_format_extensible->Format.nBlockAlign =
kChannels * kBytesPerSample;
wave_format_extensible->Format.nAvgBytesPerSec =
sampling_rate_ * kChannels * kBytesPerSample;
} else {
LOG(ERROR) << "Failed to force 16-bit samples";
return false;
}
break;
}
default:
LOG(ERROR) << "Failed to force 16-bit PCM";
if (wave_format_ex_->wFormatTag != WAVE_FORMAT_IEEE_FLOAT &&
wave_format_ex_->wFormatTag != WAVE_FORMAT_PCM &&
wave_format_ex_->wFormatTag != WAVE_FORMAT_EXTENSIBLE) {
LOG(ERROR) << "Failed to force 16-bit PCM";
return false;
}
if (!AudioCapturer::IsValidSampleRate(wave_format_ex_->nSamplesPerSec)) {
LOG(ERROR) << "Host sampling rate is neither 44.1 kHz nor 48 kHz. "
<< wave_format_ex_->nSamplesPerSec;
return false;
}
// We support from mono to 7.1. This check should be consistent with
// AudioPacket::Channels.
if (wave_format_ex_->nChannels > 8 || wave_format_ex_->nChannels <= 0) {
LOG(ERROR) << "Unsupported channels " << wave_format_ex_->nChannels;
return false;
}
sampling_rate_ = static_cast<AudioPacket::SamplingRate>(
wave_format_ex_->nSamplesPerSec);
wave_format_ex_->wBitsPerSample = kBitsPerSample;
wave_format_ex_->nBlockAlign = wave_format_ex_->nChannels * kBytesPerSample;
wave_format_ex_->nAvgBytesPerSec =
sampling_rate_ * wave_format_ex_->nBlockAlign;
if (wave_format_ex_->wFormatTag == WAVE_FORMAT_EXTENSIBLE) {
PWAVEFORMATEXTENSIBLE wave_format_extensible =
reinterpret_cast<WAVEFORMATEXTENSIBLE*>(
static_cast<WAVEFORMATEX*>(wave_format_ex_));
if (!IsEqualGUID(KSDATAFORMAT_SUBTYPE_IEEE_FLOAT,
wave_format_extensible->SubFormat) &&
!IsEqualGUID(KSDATAFORMAT_SUBTYPE_PCM,
wave_format_extensible->SubFormat)) {
LOG(ERROR) << "Failed to force 16-bit samples";
return false;
}
wave_format_extensible->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
wave_format_extensible->Samples.wValidBitsPerSample = kBitsPerSample;
} else {
wave_format_ex_->wFormatTag = WAVE_FORMAT_PCM;
}
// Initialize the IAudioClient.
......@@ -233,7 +225,7 @@ bool AudioCapturerWin::Initialize() {
}
volume_filter_.ActivateBy(mm_device_.Get());
volume_filter_.Initialize(sampling_rate_, kChannels);
volume_filter_.Initialize(sampling_rate_, wave_format_ex_->nChannels);
return true;
}
......@@ -280,7 +272,11 @@ void AudioCapturerWin::DoCapture() {
packet->set_encoding(AudioPacket::ENCODING_RAW);
packet->set_sampling_rate(sampling_rate_);
packet->set_bytes_per_sample(AudioPacket::BYTES_PER_SAMPLE_2);
packet->set_channels(AudioPacket::CHANNELS_STEREO);
// Only the count of channels is taken into account now, we should also
// consider dwChannelMask.
// TODO(zijiehe): Support also layouts.
packet->set_channels(static_cast<AudioPacket::Channels>(
wave_format_ex_->nChannels));
callback_.Run(std::move(packet));
}
......
......@@ -23,6 +23,10 @@ namespace remoting {
class DefaultAudioDeviceChangeDetector;
// An AudioCapturer implementation for Windows by using Windows Audio Session
// API, a.k.a. WASAPI. It supports up to 8 channels, but treats all layouts as
// a most commonly used one. E.g. 3.1 and surround layouts will both be marked
// as surround layout.
class AudioCapturerWin : public AudioCapturer {
public:
AudioCapturerWin();
......
......@@ -47,6 +47,12 @@ message AudioPacket {
CHANNELS_INVALID = -1;
CHANNELS_MONO = 1;
CHANNELS_STEREO = 2;
CHANNELS_SURROUND = 3;
CHANNELS_4_0 = 4;
CHANNELS_4_1 = 5;
CHANNELS_5_1 = 6;
CHANNELS_6_1 = 7;
CHANNELS_7_1 = 8;
}
optional Channels channels = 6 [default = CHANNELS_INVALID];
......
......@@ -5,6 +5,7 @@ include_rules = [
"+ppapi/c",
"+ppapi/cpp",
"+ppapi/utility",
"+media/base",
"+remoting/codec",
"+remoting/signaling",
"+third_party/boringssl",
......
......@@ -4,19 +4,86 @@
#include "remoting/protocol/audio_pump.h"
#include <memory>
#include <utility>
#include "base/bind.h"
#include "base/location.h"
#include "base/logging.h"
#include "base/macros.h"
#include "base/memory/ptr_util.h"
#include "base/single_thread_task_runner.h"
#include "base/threading/thread_task_runner_handle.h"
#include "media/base/audio_bus.h"
#include "media/base/audio_sample_types.h"
#include "media/base/channel_layout.h"
#include "media/base/channel_mixer.h"
#include "remoting/codec/audio_encoder.h"
#include "remoting/proto/audio.pb.h"
#include "remoting/protocol/audio_source.h"
#include "remoting/protocol/audio_stub.h"
namespace {
int CalculateFrameCount(const remoting::AudioPacket& packet) {
return packet.data(0).size() / packet.channels() / packet.bytes_per_sample();
}
std::unique_ptr<media::AudioBus> AudioPacketToAudioBus(
const remoting::AudioPacket& packet) {
const int frame_count = CalculateFrameCount(packet);
DCHECK_GT(frame_count, 0);
std::unique_ptr<media::AudioBus> result =
media::AudioBus::Create(packet.channels(), frame_count);
result->FromInterleaved<media::SignedInt16SampleTypeTraits>(
reinterpret_cast<const int16_t*>(packet.data(0).data()), frame_count);
return result;
}
std::unique_ptr<remoting::AudioPacket> AudioBusToAudioPacket(
const media::AudioBus& packet) {
std::unique_ptr<remoting::AudioPacket> result =
base::MakeUnique<remoting::AudioPacket>();
result->add_data()->resize(
packet.channels() * packet.frames() * sizeof(int16_t));
packet.ToInterleaved<media::SignedInt16SampleTypeTraits>(
packet.frames(),
reinterpret_cast<int16_t*>(&(result->mutable_data(0)->at(0))));
result->set_encoding(remoting::AudioPacket::ENCODING_RAW);
result->set_channels(
static_cast<remoting::AudioPacket::Channels>(packet.channels()));
result->set_bytes_per_sample(remoting::AudioPacket::BYTES_PER_SAMPLE_2);
return result;
}
media::ChannelLayout RetrieveLayout(const remoting::AudioPacket& packet) {
// This switch should match AudioPacket::Channels enum in audio.proto.
switch (packet.channels()) {
case remoting::AudioPacket::CHANNELS_INVALID:
return media::CHANNEL_LAYOUT_UNSUPPORTED;
case remoting::AudioPacket::CHANNELS_MONO:
return media::CHANNEL_LAYOUT_MONO;
case remoting::AudioPacket::CHANNELS_STEREO:
return media::CHANNEL_LAYOUT_STEREO;
case remoting::AudioPacket::CHANNELS_SURROUND:
return media::CHANNEL_LAYOUT_SURROUND;
case remoting::AudioPacket::CHANNELS_4_0:
return media::CHANNEL_LAYOUT_4_0;
case remoting::AudioPacket::CHANNELS_4_1:
return media::CHANNEL_LAYOUT_4_1;
case remoting::AudioPacket::CHANNELS_5_1:
return media::CHANNEL_LAYOUT_5_1;
case remoting::AudioPacket::CHANNELS_6_1:
return media::CHANNEL_LAYOUT_6_1;
case remoting::AudioPacket::CHANNELS_7_1:
return media::CHANNEL_LAYOUT_7_1;
}
NOTREACHED() << "Invalid AudioPacket::Channels";
return media::CHANNEL_LAYOUT_UNSUPPORTED;
}
} // namespace
namespace remoting {
namespace protocol {
......@@ -36,6 +103,8 @@ class AudioPump::Core {
void OnPacketSent(int size);
private:
std::unique_ptr<AudioPacket> Downmix(std::unique_ptr<AudioPacket> packet);
void EncodeAudioPacket(std::unique_ptr<AudioPacket> packet);
base::ThreadChecker thread_checker_;
......@@ -53,6 +122,9 @@ class AudioPump::Core {
// yet.
int bytes_pending_;
std::unique_ptr<media::ChannelMixer> mixer_;
media::ChannelLayout mixer_input_layout_ = media::CHANNEL_LAYOUT_NONE;
DISALLOW_COPY_AND_ASSIGN(Core);
};
......@@ -98,15 +170,21 @@ void AudioPump::Core::EncodeAudioPacket(std::unique_ptr<AudioPacket> packet) {
int max_buffered_bytes =
audio_encoder_->GetBitrate() * kMaxBufferedIntervalMs / 1000 / 8;
if (!enabled_ || bytes_pending_ > max_buffered_bytes)
if (!enabled_ || bytes_pending_ > max_buffered_bytes) {
return;
}
if (packet->channels() > AudioPacket::CHANNELS_STEREO) {
packet = Downmix(std::move(packet));
}
std::unique_ptr<AudioPacket> encoded_packet =
audio_encoder_->Encode(std::move(packet));
// The audio encoder returns a null audio packet if there's no audio to send.
if (!encoded_packet)
if (!encoded_packet) {
return;
}
int packet_size = encoded_packet->ByteSize();
bytes_pending_ += packet_size;
......@@ -116,6 +194,35 @@ void AudioPump::Core::EncodeAudioPacket(std::unique_ptr<AudioPacket> packet) {
base::Passed(&encoded_packet), packet_size));
}
std::unique_ptr<AudioPacket> AudioPump::Core::Downmix(
std::unique_ptr<AudioPacket> packet) {
DCHECK(thread_checker_.CalledOnValidThread());
DCHECK(packet);
DCHECK_EQ(packet->data_size(), 1);
DCHECK_EQ(packet->bytes_per_sample(), AudioPacket::BYTES_PER_SAMPLE_2);
const media::ChannelLayout input_layout = RetrieveLayout(*packet);
DCHECK_NE(input_layout, media::CHANNEL_LAYOUT_UNSUPPORTED);
DCHECK_NE(input_layout, media::CHANNEL_LAYOUT_MONO);
DCHECK_NE(input_layout, media::CHANNEL_LAYOUT_STEREO);
if (!mixer_ || mixer_input_layout_ != input_layout) {
mixer_input_layout_ = input_layout;
mixer_ = base::MakeUnique<media::ChannelMixer>(
input_layout, media::CHANNEL_LAYOUT_STEREO);
}
std::unique_ptr<media::AudioBus> input = AudioPacketToAudioBus(*packet);
DCHECK(input);
std::unique_ptr<media::AudioBus> output =
media::AudioBus::Create(AudioPacket::CHANNELS_STEREO, input->frames());
mixer_->Transform(input.get(), output.get());
std::unique_ptr<AudioPacket> result = AudioBusToAudioPacket(*output);
result->set_sampling_rate(packet->sampling_rate());
return result;
}
AudioPump::AudioPump(
scoped_refptr<base::SingleThreadTaskRunner> audio_task_runner,
std::unique_ptr<AudioSource> audio_source,
......
......@@ -29,8 +29,9 @@ class AudioSource;
// AudioPump is responsible for fetching audio data from the AudioCapturer
// and encoding it before passing it to the AudioStub for delivery to the
// client. Audio is captured and encoded on the audio thread and then passed to
// AudioStub on the network thread.
// client. Audio data will be downmixed to stereo if needed. Audio is captured
// and encoded on the audio thread and then passed to AudioStub on the network
// thread.
class AudioPump : public AudioStream {
public:
// The caller must ensure that the |audio_stub| is not destroyed until the
......
......@@ -27,9 +27,13 @@ namespace protocol {
namespace {
// Creates a dummy packet with 1k data
std::unique_ptr<AudioPacket> MakeAudioPacket() {
std::unique_ptr<AudioPacket> MakeAudioPacket(int channel_count = 2) {
std::unique_ptr<AudioPacket> packet(new AudioPacket);
packet->add_data()->resize(1000);
packet->add_data()->resize(1024);
packet->set_encoding(AudioPacket::ENCODING_RAW);
packet->set_sampling_rate(AudioPacket::SAMPLING_RATE_44100);
packet->set_bytes_per_sample(AudioPacket::BYTES_PER_SAMPLE_2);
packet->set_channels(static_cast<AudioPacket::Channels>(channel_count));
return packet;
}
......@@ -42,6 +46,11 @@ class FakeAudioEncoder : public AudioEncoder {
std::unique_ptr<AudioPacket> Encode(
std::unique_ptr<AudioPacket> packet) override {
EXPECT_TRUE(!!packet);
EXPECT_EQ(packet->encoding(), AudioPacket::ENCODING_RAW);
EXPECT_EQ(packet->sampling_rate(), AudioPacket::SAMPLING_RATE_44100);
EXPECT_EQ(packet->bytes_per_sample(), AudioPacket::BYTES_PER_SAMPLE_2);
EXPECT_LE(packet->channels(), AudioPacket::CHANNELS_STEREO);
return packet;
}
int GetBitrate() override { return 160000; }
......@@ -127,5 +136,54 @@ TEST_F(AudioPumpTest, BufferSizeLimit) {
EXPECT_EQ(num_sent_packets + 1, sent_packets_.size());
}
TEST_F(AudioPumpTest, DownmixAudioPacket) {
// Run message loop to let the pump start the capturer.
base::RunLoop().RunUntilIdle();
ASSERT_TRUE(source_->callback());
// Generate several audio packets with different channel counts.
static const int kChannels[] = {
AudioPacket::CHANNELS_7_1,
AudioPacket::CHANNELS_6_1,
AudioPacket::CHANNELS_5_1,
AudioPacket::CHANNELS_STEREO,
AudioPacket::CHANNELS_MONO,
AudioPacket::CHANNELS_7_1,
AudioPacket::CHANNELS_7_1,
AudioPacket::CHANNELS_7_1,
AudioPacket::CHANNELS_7_1,
AudioPacket::CHANNELS_6_1,
AudioPacket::CHANNELS_6_1,
AudioPacket::CHANNELS_6_1,
AudioPacket::CHANNELS_6_1,
AudioPacket::CHANNELS_5_1,
AudioPacket::CHANNELS_5_1,
AudioPacket::CHANNELS_5_1,
AudioPacket::CHANNELS_5_1,
AudioPacket::CHANNELS_STEREO,
AudioPacket::CHANNELS_STEREO,
AudioPacket::CHANNELS_STEREO,
AudioPacket::CHANNELS_STEREO,
AudioPacket::CHANNELS_MONO,
AudioPacket::CHANNELS_MONO,
AudioPacket::CHANNELS_MONO,
AudioPacket::CHANNELS_MONO,
};
for (size_t i = 0; i < arraysize(kChannels); i++) {
source_->callback().Run(MakeAudioPacket(kChannels[i]));
// Run message loop to let the pump processes the audio packet and send it
// to the encoder.
base::RunLoop().RunUntilIdle();
// Call done closure to allow one more packet to be sent.
ASSERT_EQ(done_closures_.size(), 1U);
done_closures_.front().Run();
done_closures_.pop_back();
base::RunLoop().RunUntilIdle();
}
ASSERT_EQ(sent_packets_.size(), arraysize(kChannels));
}
} // namespace protocol
} // namespace remoting
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment