This commit is contained in:
Bruno Windels 2022-02-25 16:54:00 +01:00 committed by Bruno Windels
parent 98e1dcf799
commit 179c7e74b5
8 changed files with 487 additions and 557 deletions

View file

@ -44,6 +44,8 @@ export class GroupCallHandler {
} }
// TODO: check and poll turn server credentials here
handleRoomState(room: Room, events: StateEvent[], log: ILogItem) { handleRoomState(room: Room, events: StateEvent[], log: ILogItem) {
// first update call events // first update call events
for (const event of events) { for (const event of events) {

View file

@ -0,0 +1,57 @@
/*
Copyright 2022 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
import {StreamPurpose} from "../../platform/types/WebRTC";
import {Track, AudioTrack, TrackType} from "../../platform/types/MediaDevices";
import {SDPStreamMetadata} from "./callEventTypes";
export class LocalMedia {
constructor(
public readonly cameraTrack?: Track,
public readonly screenShareTrack?: Track,
public readonly microphoneTrack?: AudioTrack
) {}
withTracks(tracks: Track[]) {
const cameraTrack = tracks.find(t => t.type === TrackType.Camera) ?? this.cameraTrack;
const screenShareTrack = tracks.find(t => t.type === TrackType.ScreenShare) ?? this.screenShareTrack;
const microphoneTrack = tracks.find(t => t.type === TrackType.Microphone) ?? this.microphoneTrack;
if (cameraTrack && microphoneTrack && cameraTrack.streamId !== microphoneTrack.streamId) {
throw new Error("The camera and audio track should have the same stream id");
}
return new LocalMedia(cameraTrack, screenShareTrack, microphoneTrack as AudioTrack);
}
get tracks(): Track[] { return []; }
getSDPMetadata(): SDPStreamMetadata {
const metadata = {};
const userMediaTrack = this.microphoneTrack ?? this.cameraTrack;
if (userMediaTrack) {
metadata[userMediaTrack.streamId] = {
purpose: StreamPurpose.UserMedia,
audio_muted: this.microphoneTrack?.muted ?? false,
video_muted: this.cameraTrack?.muted ?? false,
};
}
if (this.screenShareTrack) {
metadata[this.screenShareTrack.streamId] = {
purpose: StreamPurpose.ScreenShare
};
}
return metadata;
}
}

View file

@ -15,7 +15,7 @@ limitations under the License.
*/ */
import {ObservableMap} from "../../observable/map/ObservableMap"; import {ObservableMap} from "../../observable/map/ObservableMap";
import {recursivelyAssign} from "../../utils/recursivelyAssign";
import {AsyncQueue} from "../../utils/AsyncQueue"; import {AsyncQueue} from "../../utils/AsyncQueue";
import type {Room} from "../room/Room"; import type {Room} from "../room/Room";
import type {StateEvent} from "../storage/types"; import type {StateEvent} from "../storage/types";
@ -23,8 +23,284 @@ import type {ILogItem} from "../../logging/types";
import {WebRTC, PeerConnection, PeerConnectionHandler, StreamPurpose} from "../../platform/types/WebRTC"; import {WebRTC, PeerConnection, PeerConnectionHandler, StreamPurpose} from "../../platform/types/WebRTC";
import {MediaDevices, Track, AudioTrack, TrackType} from "../../platform/types/MediaDevices"; import {MediaDevices, Track, AudioTrack, TrackType} from "../../platform/types/MediaDevices";
import type {LocalMedia} from "./LocalMedia";
import { randomString } from '../randomstring'; // when sending, we need to encrypt message with olm. I think the flow of room => roomEncryption => olmEncryption as we already
// do for sharing keys will be best as that already deals with room tracking.
/**
* Does WebRTC signalling for a single PeerConnection, and deals with WebRTC wrappers from platform
* */
/** Implements a call between two peers with the signalling state keeping, while still delegating the signalling message sending. Used by GroupCall.*/
class PeerCall {
private readonly peerConnection: PeerConnection;
private state = CallState.Fledgling;
private direction: CallDirection;
// A queue for candidates waiting to go out.
// We try to amalgamate candidates into a single candidate message where
// possible
private candidateSendQueue: Array<RTCIceCandidate> = [];
// If candidates arrive before we've picked an opponent (which, in particular,
// will happen if the opponent sends candidates eagerly before the user answers
// the call) we buffer them up here so we can then add the ones from the party we pick
private remoteCandidateBuffer? = new Map<string, RTCIceCandidate[]>();
private logger: any;
private remoteSDPStreamMetadata?: SDPStreamMetadata;
private responsePromiseChain?: Promise<void>;
private opponentPartyId?: PartyId;
constructor(
private readonly handler: PeerCallHandler,
private localMedia: LocalMedia,
webRTC: WebRTC
) {
const outer = this;
this.peerConnection = webRTC.createPeerConnection({
onIceConnectionStateChange(state: RTCIceConnectionState) {},
onLocalIceCandidate(candidate: RTCIceCandidate) {},
onIceGatheringStateChange(state: RTCIceGatheringState) {},
onRemoteTracksChanged(tracks: Track[]) {},
onDataChannelChanged(dataChannel: DataChannel | undefined) {},
onNegotiationNeeded() {
const promiseCreator = () => outer.handleNegotiation();
outer.responsePromiseChain = outer.responsePromiseChain?.then(promiseCreator) ?? promiseCreator();
},
getPurposeForStreamId(streamId: string): SDPStreamMetadataPurpose {
return outer.remoteSDPStreamMetadata?.[streamId]?.purpose ?? SDPStreamMetadataPurpose.Usermedia;
}
});
this.logger = {
debug(...args) { console.log.apply(console, ["WebRTC debug:", ...args])},
log(...args) { console.log.apply(console, ["WebRTC log:", ...args])},
warn(...args) { console.log.apply(console, ["WebRTC warn:", ...args])},
error(...args) { console.error.apply(console, ["WebRTC error:", ...args])},
}
}
handleIncomingSignallingMessage(message: SignallingMessage, partyId: PartyId) {
switch (message.type) {
case EventType.Invite:
this.handleInvite(message.content);
break;
case EventType.Answer:
this.handleAnswer(message.content);
break;
case EventType.Candidates:
this.handleRemoteIceCandidates(message.content);
break;
case EventType.Hangup:
}
}
async call(localMediaPromise: Promise<LocalMedia>): Promise<void> {
if (this.state !== CallState.Fledgling) {
return;
}
this.direction = CallDirection.Outbound;
this.setState(CallState.WaitLocalMedia);
try {
this.localMedia = await localMediaPromise;
} catch (err) {
this.setState(CallState.Ended);
return;
}
this.setState(CallState.CreateOffer);
// add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
for (const t of this.localMedia.tracks) {
this.peerConnection.addTrack(t);
}
await this.waitForState(CallState.InviteSent);
}
async answer() {
}
async hangup() {
}
async updateLocalMedia(localMediaPromise: Promise<LocalMedia>) {
const oldMedia = this.localMedia;
this.localMedia = await localMediaPromise;
const applyTrack = (selectTrack: (media: LocalMedia) => Track | undefined) => {
const oldTrack = selectTrack(oldMedia);
const newTrack = selectTrack(this.localMedia);
if (oldTrack && newTrack) {
this.peerConnection.replaceTrack(oldTrack, newTrack);
} else if (oldTrack) {
this.peerConnection.removeTrack(oldTrack);
} else if (newTrack) {
this.peerConnection.addTrack(newTrack);
}
};
// add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
applyTrack(m => m.microphoneTrack);
applyTrack(m => m.cameraTrack);
applyTrack(m => m.screenShareTrack);
}
// calls are serialized and deduplicated by negotiationQueue
private handleNegotiation = async (): Promise<void> => {
try {
await this.peerConnection.setLocalDescription();
} catch (err) {
this.logger.debug(`Call ${this.callId} Error setting local description!`, err);
this.terminate(CallParty.Local, CallErrorCode.SetLocalDescription, true);
return;
}
if (this.peerConnection.iceGatheringState === 'gathering') {
// Allow a short time for initial candidates to be gathered
await new Promise(resolve => setTimeout(resolve, 200));
}
if (this.state === CallState.Ended) {
return;
}
const offer = this.peerConnection.localDescription!;
// Get rid of any candidates waiting to be sent: they'll be included in the local
// description we just got and will send in the offer.
this.logger.info(`Call ${this.callId} Discarding ${
this.candidateSendQueue.length} candidates that will be sent in offer`);
this.candidateSendQueue = [];
// need to queue this
const content = {
offer,
[SDPStreamMetadataKey]: this.localMedia.getSDPMetadata(),
version: 1,
lifetime: CALL_TIMEOUT_MS
};
if (this.state === CallState.CreateOffer) {
await this.handler.sendSignallingMessage({type: EventType.Invite, content});
this.setState(CallState.InviteSent);
}
};
private async handleInvite(content: InviteContent, partyId: PartyId): Promise<void> {
if (this.state !== CallState.Fledgling || this.opponentPartyId !== undefined) {
// TODO: hangup or ignore?
return;
}
// we must set the party ID before await-ing on anything: the call event
// handler will start giving us more call events (eg. candidates) so if
// we haven't set the party ID, we'll ignore them.
this.opponentPartyId = partyId;
this.direction = CallDirection.Inbound;
const sdpStreamMetadata = content[SDPStreamMetadataKey];
if (sdpStreamMetadata) {
this.updateRemoteSDPStreamMetadata(sdpStreamMetadata);
} else {
this.logger.debug(`Call ${
this.callId} did not get any SDPStreamMetadata! Can not send/receive multiple streams`);
}
try {
await this.peerConnection.setRemoteDescription(content.offer);
await this.addBufferedIceCandidates();
} catch (e) {
this.logger.debug(`Call ${this.callId} failed to set remote description`, e);
this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false);
return;
}
// According to previous comments in this file, firefox at some point did not
// add streams until media started arriving on them. Testing latest firefox
// (81 at time of writing), this is no longer a problem, so let's do it the correct way.
if (this.peerConnection.remoteTracks.length === 0) {
this.logger.error(`Call ${this.callId} no remote stream or no tracks after setting remote description!`);
this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false);
return;
}
this.setState(CallState.Ringing);
setTimeout(() => {
if (this.state == CallState.Ringing) {
this.logger.debug(`Call ${this.callId} invite has expired. Hanging up.`);
this.hangupParty = CallParty.Remote; // effectively
this.setState(CallState.Ended);
this.stopAllMedia();
if (this.peerConnection.signalingState != 'closed') {
this.peerConnection.close();
}
this.emit(CallEvent.Hangup);
}
}, content.lifetime ?? CALL_TIMEOUT_MS /* - event.getLocalAge() */ );
}
private updateRemoteSDPStreamMetadata(metadata: SDPStreamMetadata): void {
this.remoteSDPStreamMetadata = recursivelyAssign(this.remoteSDPStreamMetadata || {}, metadata, true);
// will rerequest stream purpose for all tracks and set track.type accordingly
this.peerConnection.notifyStreamPurposeChanged();
for (const track of this.peerConnection.remoteTracks) {
const streamMetaData = this.remoteSDPStreamMetadata?.[track.streamId];
if (streamMetaData) {
if (track.type === TrackType.Microphone) {
track.setMuted(streamMetaData.audio_muted);
} else { // Camera or ScreenShare
track.setMuted(streamMetaData.video_muted);
}
}
}
}
private async addBufferedIceCandidates(): Promise<void> {
const bufferedCandidates = this.remoteCandidateBuffer!.get(this.opponentPartyId!);
if (bufferedCandidates) {
this.logger.info(`Call ${this.callId} Adding ${
bufferedCandidates.length} buffered candidates for opponent ${this.opponentPartyId}`);
await this.addIceCandidates(bufferedCandidates);
}
this.remoteCandidateBuffer = undefined;
}
private async addIceCandidates(candidates: RTCIceCandidate[]): Promise<void> {
for (const candidate of candidates) {
if (
(candidate.sdpMid === null || candidate.sdpMid === undefined) &&
(candidate.sdpMLineIndex === null || candidate.sdpMLineIndex === undefined)
) {
this.logger.debug(`Call ${this.callId} ignoring remote ICE candidate with no sdpMid or sdpMLineIndex`);
continue;
}
this.logger.debug(`Call ${this.callId} got remote ICE ${candidate.sdpMid} candidate: ${candidate.candidate}`);
try {
await this.peerConnection.addIceCandidate(candidate);
} catch (err) {
if (!this.ignoreOffer) {
this.logger.info(`Call ${this.callId} failed to add remote ICE candidate`, err);
}
}
}
}
private setState(state: CallState): void {
const oldState = this.state;
this.state = state;
this.handler.emitUpdate();
}
private waitForState(state: CallState): Promise<void> {
}
private async terminate(hangupParty: CallParty, hangupReason: CallErrorCode, shouldEmit: boolean): Promise<void> {
}
}
//import { randomString } from '../randomstring';
import { import {
MCallReplacesEvent, MCallReplacesEvent,
MCallAnswer, MCallAnswer,
@ -41,41 +317,13 @@ import {
MCallHangupReject, MCallHangupReject,
} from './callEventTypes'; } from './callEventTypes';
const GROUP_CALL_TYPE = "m.call";
const GROUP_CALL_MEMBER_TYPE = "m.call.member";
/**
* Fires whenever an error occurs when call.js encounters an issue with setting up the call.
* <p>
* The error given will have a code equal to either `MatrixCall.ERR_LOCAL_OFFER_FAILED` or
* `MatrixCall.ERR_NO_USER_MEDIA`. `ERR_LOCAL_OFFER_FAILED` is emitted when the local client
* fails to create an offer. `ERR_NO_USER_MEDIA` is emitted when the user has denied access
* to their audio/video hardware.
*
* @event module:webrtc/call~MatrixCall#"error"
* @param {Error} err The error raised by MatrixCall.
* @example
* matrixCall.on("error", function(err){
* console.error(err.code, err);
* });
*/
// null is used as a special value meaning that the we're in a legacy 1:1 call // null is used as a special value meaning that the we're in a legacy 1:1 call
// without MSC2746 that doesn't provide an id which device sent the message. // without MSC2746 that doesn't provide an id which device sent the message.
type PartyId = string | null; type PartyId = string | null;
interface TurnServer { export enum CallParty {
urls: Array<string>; Local = 'local',
username?: string; Remote = 'remote',
password?: string;
ttl?: number;
}
interface AssertedIdentity {
id: string;
displayName: string;
} }
export enum CallState { export enum CallState {
@ -90,21 +338,11 @@ export enum CallState {
Ended = 'ended', Ended = 'ended',
} }
export enum CallType {
Voice = 'voice',
Video = 'video',
}
export enum CallDirection { export enum CallDirection {
Inbound = 'inbound', Inbound = 'inbound',
Outbound = 'outbound', Outbound = 'outbound',
} }
export enum CallParty {
Local = 'local',
Remote = 'remote',
}
export enum EventType { export enum EventType {
Invite = "m.call.invite", Invite = "m.call.invite",
Candidates = "m.call.candidates", Candidates = "m.call.candidates",
@ -120,30 +358,6 @@ export enum EventType {
AssertedIdentityPrefix = "org.matrix.call.asserted_identity", AssertedIdentityPrefix = "org.matrix.call.asserted_identity",
} }
export enum CallEvent {
Hangup = 'hangup',
State = 'state',
Error = 'error',
Replaced = 'replaced',
// The value of isLocalOnHold() has changed
LocalHoldUnhold = 'local_hold_unhold',
// The value of isRemoteOnHold() has changed
RemoteHoldUnhold = 'remote_hold_unhold',
// backwards compat alias for LocalHoldUnhold: remove in a major version bump
HoldUnhold = 'hold_unhold',
// Feeds have changed
FeedsChanged = 'feeds_changed',
AssertedIdentityChanged = 'asserted_identity_changed',
LengthChanged = 'length_changed',
DataChannel = 'datachannel',
SendVoipEvent = "send_voip_event",
}
export enum CallErrorCode { export enum CallErrorCode {
/** The user chose to end the call */ /** The user chose to end the call */
UserHangup = 'user_hangup', UserHangup = 'user_hangup',
@ -254,477 +468,21 @@ export class CallError extends Error {
} }
} }
export function genCallID(): string { type InviteContent = {
return Date.now().toString() + randomString(16); offer: RTCSessionDescriptionInit,
} [SDPStreamMetadataKey]: SDPStreamMetadata,
version?: number,
enum CallSetupMessageType { lifetime?: number
Invite = "m.call.invite",
Answer = "m.call.answer",
Candidates = "m.call.candidates",
Hangup = "m.call.hangup",
}
const CALL_ID = "m.call_id";
const CALL_TERMINATED = "m.terminated";
class LocalMedia {
constructor(
public readonly cameraTrack?: Track,
public readonly screenShareTrack?: Track,
public readonly microphoneTrack?: AudioTrack
) {}
withTracks(tracks: Track[]) {
const cameraTrack = tracks.find(t => t.type === TrackType.Camera) ?? this.cameraTrack;
const screenShareTrack = tracks.find(t => t.type === TrackType.ScreenShare) ?? this.screenShareTrack;
const microphoneTrack = tracks.find(t => t.type === TrackType.Microphone) ?? this.microphoneTrack;
if (cameraTrack && microphoneTrack && cameraTrack.streamId !== microphoneTrack.streamId) {
throw new Error("The camera and audio track should have the same stream id");
}
return new LocalMedia(cameraTrack, screenShareTrack, microphoneTrack as AudioTrack);
}
get tracks(): Track[] { return []; }
getSDPMetadata(): any {
const metadata = {};
const userMediaTrack = this.microphoneTrack ?? this.cameraTrack;
if (userMediaTrack) {
metadata[userMediaTrack.streamId] = {
purpose: StreamPurpose.UserMedia,
audio_muted: this.microphoneTrack?.muted ?? false,
video_muted: this.cameraTrack?.muted ?? false,
};
}
if (this.screenShareTrack) {
metadata[this.screenShareTrack.streamId] = {
purpose: StreamPurpose.ScreenShare
};
}
return metadata;
}
} }
export type InviteMessage = { export type InviteMessage = {
type: EventType.Invite, type: EventType.Invite,
content: { content: InviteContent
version: number
}
} }
export type SignallingMessage = InviteMessage;
export interface PeerCallHandler { export interface PeerCallHandler {
emitUpdate(peerCall: PeerCall, params: any); emitUpdate(peerCall: PeerCall, params: any);
sendSignallingMessage(type: EventType, content: Record<string, any>); sendSignallingMessage(message: InviteMessage);
}
// when sending, we need to encrypt message with olm. I think the flow of room => roomEncryption => olmEncryption as we already
// do for sharing keys will be best as that already deals with room tracking.
/**
* Does WebRTC signalling for a single PeerConnection, and deals with WebRTC wrappers from platform
* */
/** Implements a call between two peers with the signalling state keeping, while still delegating the signalling message sending. Used by GroupCall.*/
class PeerCall implements PeerConnectionHandler {
private readonly peerConnection: PeerConnection;
public state = CallState.Fledgling;
public hangupParty: CallParty;
public hangupReason: string;
public direction: CallDirection;
public peerConn?: RTCPeerConnection;
// A queue for candidates waiting to go out.
// We try to amalgamate candidates into a single candidate message where
// possible
private candidateSendQueue: Array<RTCIceCandidate> = [];
private candidateSendTries = 0;
private sentEndOfCandidates = false;
private inviteOrAnswerSent = false;
private waitForLocalAVStream: boolean;
private opponentVersion: number | string;
// The party ID of the other side: undefined if we haven't chosen a partner
// yet, null if we have but they didn't send a party ID.
private opponentPartyId: PartyId;
private opponentCaps: CallCapabilities;
private inviteTimeout: number;
private iceDisconnectedTimeout: number;
// The logic of when & if a call is on hold is nontrivial and explained in is*OnHold
// This flag represents whether we want the other party to be on hold
private remoteOnHold = false;
// the stats for the call at the point it ended. We can't get these after we
// tear the call down, so we just grab a snapshot before we stop the call.
// The typescript definitions have this type as 'any' :(
private callStatsAtEnd: any[];
// Perfect negotiation state: https://www.w3.org/TR/webrtc/#perfect-negotiation-example
private makingOffer = false;
private ignoreOffer: boolean;
// If candidates arrive before we've picked an opponent (which, in particular,
// will happen if the opponent sends candidates eagerly before the user answers
// the call) we buffer them up here so we can then add the ones from the party we pick
private remoteCandidateBuffer: Map<PartyId, RTCIceCandidate[]>;
private remoteAssertedIdentity: AssertedIdentity;
private remoteSDPStreamMetadata?: SDPStreamMetadata;
private negotiationQueue: AsyncQueue<void, void>;
constructor(
private readonly handler: PeerCallHandler,
private localMedia: LocalMedia,
webRTC: WebRTC
) {
this.peerConnection = webRTC.createPeerConnection(this);
// TODO: should we use this to serialize all state changes?
this.negotiationQueue = new AsyncQueue(this.handleNegotiation, void);
}
// PeerConnectionHandler method
onIceConnectionStateChange(state: RTCIceConnectionState) {}
// PeerConnectionHandler method
onLocalIceCandidate(candidate: RTCIceCandidate) {}
// PeerConnectionHandler method
onIceGatheringStateChange(state: RTCIceGatheringState) {}
// PeerConnectionHandler method
onRemoteTracksChanged(tracks: Track[]) {}
// PeerConnectionHandler method
onDataChannelChanged(dataChannel: DataChannel | undefined) {}
// PeerConnectionHandler method
onNegotiationNeeded() {
// trigger handleNegotiation
this.negotiationQueue.push(void);
}
// calls are serialized and deduplicated by negotiationQueue
private handleNegotiation = async (): Promise<void> => {
const offer = await this.peerConnection.createOffer();
this.peerConnection.setLocalDescription(offer);
// need to queue this
const message = {
offer,
sdp_stream_metadata: this.localMedia.getSDPMetadata(),
version: 1
}
if (this.state === CallState.Fledgling) {
const sendPromise = this.handler.sendSignallingMessage(EventType.Invite, message);
this.setState(CallState.InviteSent);
} else {
await this.handler.sendSignallingMessage(EventType.Negotiate, message);
}
};
async sendInvite(localMediaPromise: Promise<LocalMedia>): Promise<void> {
if (this.state !== CallState.Fledgling) {
return;
}
this.setState(CallState.WaitLocalMedia);
this.localMedia = await localMediaPromise;
// add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
for (const t of this.localMedia.tracks) {
this.peerConnection.addTrack(t);
}
await this.waitForState(CallState.Ended, CallState.InviteSent);
}
async sendAnswer(localMediaPromise: Promise<LocalMedia>): Promise<void> {
if (this.callHasEnded()) return;
if (this.state !== CallState.Ringing) {
return;
}
this.setState(CallState.WaitLocalMedia);
this.waitForLocalAVStream = true;
this.localMedia = await localMediaPromise;
this.waitForLocalAVStream = false;
// enqueue the following
// add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
for (const t of this.localMedia.tracks) {
this.peerConnection.addTrack(t);
}
this.setState(CallState.CreateAnswer);
let myAnswer;
try {
myAnswer = await this.peerConn.createAnswer();
} catch (err) {
logger.debug("Failed to create answer: ", err);
this.terminate(CallParty.Local, CallErrorCode.CreateAnswer, true);
return;
}
try {
await this.peerConn.setLocalDescription(myAnswer);
this.setState(CallState.Connecting);
// Allow a short time for initial candidates to be gathered
await new Promise(resolve => {
setTimeout(resolve, 200);
});
// inlined sendAnswer
const answerContent = {
answer: {
sdp: this.peerConn.localDescription.sdp,
// type is now deprecated as of Matrix VoIP v1, but
// required to still be sent for backwards compat
type: this.peerConn.localDescription.type,
},
[SDPStreamMetadataKey]: this.getLocalSDPStreamMetadata(true),
} as MCallAnswer;
answerContent.capabilities = {
'm.call.transferee': this.client.supportsCallTransfer,
'm.call.dtmf': false,
};
// We have just taken the local description from the peerConn which will
// contain all the local candidates added so far, so we can discard any candidates
// we had queued up because they'll be in the answer.
logger.info(`Discarding ${this.candidateSendQueue.length} candidates that will be sent in answer`);
this.candidateSendQueue = [];
try {
await this.sendVoipEvent(EventType.CallAnswer, answerContent);
// If this isn't the first time we've tried to send the answer,
// we may have candidates queued up, so send them now.
this.inviteOrAnswerSent = true;
} catch (error) {
// We've failed to answer: back to the ringing state
this.setState(CallState.Ringing);
this.client.cancelPendingEvent(error.event);
let code = CallErrorCode.SendAnswer;
let message = "Failed to send answer";
if (error.name == 'UnknownDeviceError') {
code = CallErrorCode.UnknownDevices;
message = "Unknown devices present in the room";
}
this.emit(CallEvent.Error, new CallError(code, message, error));
throw error;
}
// error handler re-throws so this won't happen on error, but
// we don't want the same error handling on the candidate queue
this.sendCandidateQueue();
} catch (err) {
logger.debug("Error setting local description!", err);
this.terminate(CallParty.Local, CallErrorCode.SetLocalDescription, true);
return;
}
}
async updateLocalMedia(localMediaPromise: Promise<LocalMedia>) {
const oldMedia = this.localMedia;
this.localMedia = await localMediaPromise;
const applyTrack = (selectTrack: (media: LocalMedia) => Track | undefined) => {
const oldTrack = selectTrack(oldMedia);
const newTrack = selectTrack(this.localMedia);
if (oldTrack && newTrack) {
this.peerConnection.replaceTrack(oldTrack, newTrack);
} else if (oldTrack) {
this.peerConnection.removeTrack(oldTrack);
} else if (newTrack) {
this.peerConnection.addTrack(newTrack);
}
};
// add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
applyTrack(m => m.microphoneTrack);
applyTrack(m => m.cameraTrack);
applyTrack(m => m.screenShareTrack);
}
/**
* Replace this call with a new call, e.g. for glare resolution. Used by
* MatrixClient.
* @param {MatrixCall} newCall The new call.
*/
public replacedBy(newCall: MatrixCall): void {
if (this.state === CallState.WaitLocalMedia) {
logger.debug("Telling new call to wait for local media");
newCall.waitForLocalAVStream = true;
} else if ([CallState.CreateOffer, CallState.InviteSent].includes(this.state)) {
if (newCall.direction === CallDirection.Outbound) {
newCall.queueGotCallFeedsForAnswer([]);
} else {
logger.debug("Handing local stream to new call");
newCall.queueGotCallFeedsForAnswer(this.getLocalFeeds().map(feed => feed.clone()));
}
}
this.successor = newCall;
this.emit(CallEvent.Replaced, newCall);
this.hangup(CallErrorCode.Replaced, true);
}
/**
* Hangup a call.
* @param {string} reason The reason why the call is being hung up.
* @param {boolean} suppressEvent True to suppress emitting an event.
*/
public hangup(reason: CallErrorCode, suppressEvent: boolean): void {
if (this.callHasEnded()) return;
logger.debug("Ending call " + this.callId);
this.terminate(CallParty.Local, reason, !suppressEvent);
// We don't want to send hangup here if we didn't even get to sending an invite
if (this.state === CallState.WaitLocalMedia) return;
const content = {};
// Don't send UserHangup reason to older clients
if ((this.opponentVersion && this.opponentVersion >= 1) || reason !== CallErrorCode.UserHangup) {
content["reason"] = reason;
}
this.sendVoipEvent(EventType.CallHangup, content);
}
/**
* Reject a call
* This used to be done by calling hangup, but is a separate method and protocol
* event as of MSC2746.
*/
public reject(): void {
if (this.state !== CallState.Ringing) {
throw Error("Call must be in 'ringing' state to reject!");
}
if (this.opponentVersion < 1) {
logger.info(
`Opponent version is less than 1 (${this.opponentVersion}): sending hangup instead of reject`,
);
this.hangup(CallErrorCode.UserHangup, true);
return;
}
logger.debug("Rejecting call: " + this.callId);
this.terminate(CallParty.Local, CallErrorCode.UserHangup, true);
this.sendVoipEvent(EventType.CallReject, {});
}
// request the type of incoming track
getPurposeForStreamId(streamId: string): StreamPurpose {
// TODO: should we return a promise here for the case where the metadata hasn't arrived yet?
const metaData = this.remoteSDPStreamMetadata[streamId];
return metadata?.purpose as StreamPurpose ?? StreamPurpose.UserMedia;
}
private setState(state: CallState): void {
const oldState = this.state;
this.state = state;
this.handler.emitUpdate();
if (this.inviteDeferred) {
if (this.state === CallState.InviteSent) {
this.inviteDeferred.resolve();
}
}
}
handleIncomingSignallingMessage(type: CallSetupMessageType, content: Record<string, any>, partyId: PartyId) {
switch (type) {
case CallSetupMessageType.Invite:
case CallSetupMessageType.Answer:
this.handleAnswer(content);
break;
case CallSetupMessageType.Candidates:
this.handleRemoteIceCandidates(content);
break;
case CallSetupMessageType.Hangup:
}
}
private async handleAnswer(content: MCallAnswer, partyId: PartyId) {
// add buffered ice candidates to peerConnection
if (this.opponentPartyId !== undefined) {
return;
}
this.opponentPartyId = partyId;
const bufferedCandidates = this.remoteCandidateBuffer?.get(partyId);
if (bufferedCandidates) {
this.addIceCandidates(bufferedCandidates);
}
this.remoteCandidateBuffer = undefined;
this.setState(CallState.Connecting);
const sdpStreamMetadata = content[SDPStreamMetadataKey];
if (sdpStreamMetadata) {
this.updateRemoteSDPStreamMetadata(sdpStreamMetadata);
} else {
logger.warn("Did not get any SDPStreamMetadata! Can not send/receive multiple streams");
}
try {
await this.peerConnection.setRemoteDescription(content.answer);
} catch (e) {
logger.debug("Failed to set remote description", e);
this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false);
return;
}
// If the answer we selected has a party_id, send a select_answer event
// We do this after setting the remote description since otherwise we'd block
// call setup on it
if (this.opponentPartyId !== null) {
try {
await this.sendVoipEvent(EventType.CallSelectAnswer, {
selected_party_id: this.opponentPartyId,
});
} catch (err) {
// This isn't fatal, and will just mean that if another party has raced to answer
// the call, they won't know they got rejected, so we carry on & don't retry.
logger.warn("Failed to send select_answer event", err);
}
}
}
private handleRemoteIceCandidates(content: Record<string, any>) {
if (this.state === CallState.Ended) {
return;
}
const candidates = content.candidates;
if (!candidates) {
return;
}
if (this.opponentPartyId === undefined) {
if (!this.remoteCandidateBuffer) {
this.remoteCandidateBuffer = new Map();
}
const bufferedCandidates = this.remoteCandidateBuffer.get(fromPartyId) || [];
bufferedCandidates.push(...candidates);
this.remoteCandidateBuffer.set(fromPartyId, bufferedCandidates);
} else {
this.addIceCandidates(candidates);
}
}
private async addIceCandidates(candidates: RTCIceCandidate[]): Promise<void> {
for (const candidate of candidates) {
if (
(candidate.sdpMid === null || candidate.sdpMid === undefined) &&
(candidate.sdpMLineIndex === null || candidate.sdpMLineIndex === undefined)
) {
logger.debug("Ignoring remote ICE candidate with no sdpMid or sdpMLineIndex");
continue;
}
logger.debug(
"Call " + this.callId + " got remote ICE " + candidate.sdpMid + " candidate: " + candidate.candidate,
);
try {
await this.peerConnection.addIceCandidate(candidate);
} catch (err) {
if (!this.ignoreOffer) {
logger.info("Failed to add remote ICE candidate", err);
}
}
}
}
} }

View file

@ -1,6 +1,17 @@
- relevant MSCs next to spec:
- https://github.com/matrix-org/matrix-doc/pull/2746 Improved Signalling for 1:1 VoIP
- https://github.com/matrix-org/matrix-doc/pull/2747 Transferring VoIP Calls
- https://github.com/matrix-org/matrix-doc/pull/3077 Support for multi-stream VoIP
- https://github.com/matrix-org/matrix-doc/pull/3086 Asserted identity on VoIP calls
- https://github.com/matrix-org/matrix-doc/pull/3291 Muting in VoIP calls
- https://github.com/matrix-org/matrix-doc/pull/3401 Native Group VoIP Signalling
## TODO ## TODO
- PeerCall - PeerCall
- send invite - send invite
- implement terminate
- implement waitForState
- find out if we need to do something different when renegotation is triggered (a subsequent onnegotiationneeded event) whether - find out if we need to do something different when renegotation is triggered (a subsequent onnegotiationneeded event) whether
we sent the invite/offer or answer. e.g. do we always do createOffer/setLocalDescription and then send it over a matrix negotiation event? even if we before called createAnswer. we sent the invite/offer or answer. e.g. do we always do createOffer/setLocalDescription and then send it over a matrix negotiation event? even if we before called createAnswer.
- handle receiving offer and send anwser - handle receiving offer and send anwser
@ -36,12 +47,29 @@ we wait for other participants to add their user and device (in the sources)
for each (userid, deviceid) for each (userid, deviceid)
- if userId < ourUserId - if userId < ourUserId
- get local media
- we setup a peer connection - we setup a peer connection
- add local tracks
- we wait for negotation event to get sdp - we wait for negotation event to get sdp
- peerConn.createOffer
- peerConn.setLocalDescription
- we send an m.call.invite - we send an m.call.invite
- else - else
- wait for invite from other side - wait for invite from other side
on local ice candidate:
- if we haven't ... sent invite yet? or received answer? buffer candidate
- otherwise send candidate (without buffering?)
on incoming call:
- ring, offer to answer
answering incoming call
- get local media
- peerConn.setRemoteDescription
- add local tracks to peerConn
- peerConn.createAnswer()
- peerConn.setLocalDescription
in some cases, we will actually send the invite to all devices (e.g. SFU), so in some cases, we will actually send the invite to all devices (e.g. SFU), so
we probably still need to handle multiple anwsers? we probably still need to handle multiple anwsers?
@ -50,9 +78,6 @@ so we would send an invite to multiple devices and pick the one for which we
received the anwser first. between invite and anwser, we could already receive received the anwser first. between invite and anwser, we could already receive
ice candidates that we need to buffer. ice candidates that we need to buffer.
should a PeerCall only exist after we've received an answer?
Before that, we could have a PeerCallInvite
updating the metadata: updating the metadata:
@ -64,3 +89,38 @@ if just muting: use m.call.sdp_stream_metadata_changed
party identification party identification
- for 1:1 calls, we identify with a party_id - for 1:1 calls, we identify with a party_id
- for group calls, we identify with a device_id - for group calls, we identify with a device_id
## TODO
Build basic version of PeerCall
Build basic version of GroupCall
Make it possible to olm encrypt the messages
Do work needed for state events
- receiving (almost done?)
- sending
Expose call objects
Write view model
write view
## Calls questions\
- how do we handle glare between group calls (e.g. different state events with different call ids?)
- Split up DOM part into platform code? What abstractions to choose?
Does it make sense to come up with our own API very similar to DOM api?
- what code do we copy over vs what do we implement ourselves?
- MatrixCall: perhaps we can copy it over and modify it to our needs? Seems to have a lot of edge cases implemented.
- what is partyId about?
- CallFeed: I need better understand where it is used. It's basically a wrapper around a MediaStream with volume detection. Could it make sense to put this in platform for example?
- which parts of MSC2746 are still relevant for group calls?
- which parts of MSC2747 are still relevant for group calls? it seems mostly orthogonal?
- SOLVED: how does switching channels work? This was only enabled by MSC 2746
- you do getUserMedia()/getDisplayMedia() to get the stream(s)
- you call removeTrack/addTrack on the peerConnection
- you receive a negotiationneeded event
- you call createOffer
- you send m.call.negotiate
- SOLVED: wrt to MSC2746, is the screen share track and the audio track (and video track) part of the same stream? or do screen share tracks need to go in a different stream? it sounds incompatible with the MSC2746 requirement.
- SOLVED: how does muting work? MediaStreamTrack.enabled

View file

@ -15,11 +15,7 @@ limitations under the License.
*/ */
import {Track, TrackType} from "./MediaDevices"; import {Track, TrackType} from "./MediaDevices";
import {SDPStreamMetadataPurpose} from "../../matrix/calls/callEventTypes";
export enum StreamPurpose {
UserMedia = "m.usermedia",
ScreenShare = "m.screenshare"
}
export interface WebRTC { export interface WebRTC {
createPeerConnection(handler: PeerConnectionHandler): PeerConnection; createPeerConnection(handler: PeerConnectionHandler): PeerConnection;
@ -33,7 +29,7 @@ export interface PeerConnectionHandler {
onDataChannelChanged(dataChannel: DataChannel | undefined); onDataChannelChanged(dataChannel: DataChannel | undefined);
onNegotiationNeeded(); onNegotiationNeeded();
// request the type of incoming stream // request the type of incoming stream
getPurposeForStreamId(streamId: string): StreamPurpose; getPurposeForStreamId(streamId: string): SDPStreamMetadataPurpose;
} }
// does it make sense to wrap this? // does it make sense to wrap this?
export interface DataChannel { export interface DataChannel {
@ -42,8 +38,11 @@ export interface DataChannel {
} }
export interface PeerConnection { export interface PeerConnection {
get remoteTracks(): Track[] | undefined; notifyStreamPurposeChanged(): void;
get remoteTracks(): Track[];
get dataChannel(): DataChannel | undefined; get dataChannel(): DataChannel | undefined;
get iceGatheringState(): RTCIceGatheringState;
get localDescription(): RTCSessionDescription | undefined;
createOffer(): Promise<RTCSessionDescriptionInit>; createOffer(): Promise<RTCSessionDescriptionInit>;
createAnswer(): Promise<RTCSessionDescriptionInit>; createAnswer(): Promise<RTCSessionDescriptionInit>;
setLocalDescription(description?: RTCSessionDescriptionInit): Promise<void>; setLocalDescription(description?: RTCSessionDescriptionInit): Promise<void>;

View file

@ -90,9 +90,10 @@ export class TrackWrapper implements Track {
constructor( constructor(
public readonly track: MediaStreamTrack, public readonly track: MediaStreamTrack,
public readonly stream: MediaStream, public readonly stream: MediaStream,
public readonly type: TrackType private _type: TrackType,
) {} ) {}
get type(): TrackType { return this._type; }
get label(): string { return this.track.label; } get label(): string { return this.track.label; }
get id(): string { return this.track.id; } get id(): string { return this.track.id; }
get streamId(): string { return this.stream.id; } get streamId(): string { return this.stream.id; }
@ -102,6 +103,10 @@ export class TrackWrapper implements Track {
setMuted(muted: boolean): void { setMuted(muted: boolean): void {
this.track.enabled = !muted; this.track.enabled = !muted;
} }
setType(type: TrackType): void {
this._type = type;
}
} }
export class AudioTrackWrapper extends TrackWrapper { export class AudioTrackWrapper extends TrackWrapper {

View file

@ -16,7 +16,8 @@ limitations under the License.
import {TrackWrapper, wrapTrack} from "./MediaDevices"; import {TrackWrapper, wrapTrack} from "./MediaDevices";
import {Track, TrackType} from "../../types/MediaDevices"; import {Track, TrackType} from "../../types/MediaDevices";
import {WebRTC, PeerConnectionHandler, DataChannel, PeerConnection, StreamPurpose} from "../../types/WebRTC"; import {WebRTC, PeerConnectionHandler, DataChannel, PeerConnection} from "../../types/WebRTC";
import {SDPStreamMetadataPurpose} from "../../../matrix/calls/callEventTypes";
const POLLING_INTERVAL = 200; // ms const POLLING_INTERVAL = 200; // ms
export const SPEAKING_THRESHOLD = -60; // dB export const SPEAKING_THRESHOLD = -60; // dB
@ -25,8 +26,8 @@ const SPEAKING_SAMPLE_COUNT = 8; // samples
class DOMPeerConnection implements PeerConnection { class DOMPeerConnection implements PeerConnection {
private readonly peerConnection: RTCPeerConnection; private readonly peerConnection: RTCPeerConnection;
private readonly handler: PeerConnectionHandler; private readonly handler: PeerConnectionHandler;
private dataChannelWrapper?: DOMDataChannel; //private dataChannelWrapper?: DOMDataChannel;
private _remoteTracks: TrackWrapper[]; private _remoteTracks: TrackWrapper[] = [];
constructor(handler: PeerConnectionHandler, forceTURN: boolean, turnServers: RTCIceServer[], iceCandidatePoolSize = 0) { constructor(handler: PeerConnectionHandler, forceTURN: boolean, turnServers: RTCIceServer[], iceCandidatePoolSize = 0) {
this.handler = handler; this.handler = handler;
@ -39,7 +40,9 @@ class DOMPeerConnection implements PeerConnection {
} }
get remoteTracks(): Track[] { return this._remoteTracks; } get remoteTracks(): Track[] { return this._remoteTracks; }
get dataChannel(): DataChannel | undefined { return this.dataChannelWrapper; } get dataChannel(): DataChannel | undefined { return undefined; }
get iceGatheringState(): RTCIceGatheringState { return this.peerConnection.iceGatheringState; }
get localDescription(): RTCSessionDescription | undefined { return this.peerConnection.localDescription ?? undefined; }
createOffer(): Promise<RTCSessionDescriptionInit> { createOffer(): Promise<RTCSessionDescriptionInit> {
return this.peerConnection.createOffer(); return this.peerConnection.createOffer();
@ -97,6 +100,14 @@ class DOMPeerConnection implements PeerConnection {
} }
return false; return false;
} }
notifyStreamPurposeChanged(): void {
for (const track of this.remoteTracks) {
const wrapper = track as TrackWrapper;
wrapper.setType(this.getRemoteTrackType(wrapper.track, wrapper.streamId));
}
}
createDataChannel(): DataChannel { createDataChannel(): DataChannel {
return new DataChannel(this.peerConnection.createDataChannel()); return new DataChannel(this.peerConnection.createDataChannel());
} }
@ -173,20 +184,19 @@ class DOMPeerConnection implements PeerConnection {
// of the new tracks, filter the ones that we didn't already knew about // of the new tracks, filter the ones that we didn't already knew about
const addedTracks = updatedTracks.filter(ut => !this._remoteTracks.some(t => t.track.id === ut.track.id)); const addedTracks = updatedTracks.filter(ut => !this._remoteTracks.some(t => t.track.id === ut.track.id));
// wrap them // wrap them
const wrappedAddedTracks = addedTracks.map(t => this.wrapRemoteTrack(t.track, t.stream)); const wrappedAddedTracks = addedTracks.map(t => wrapTrack(t.track, t.stream, this.getRemoteTrackType(t.track, t.stream.id)));
// and concat the tracks for other streams with the added tracks // and concat the tracks for other streams with the added tracks
this._remoteTracks = withoutRemovedTracks.concat(...wrappedAddedTracks); this._remoteTracks = withoutRemovedTracks.concat(...wrappedAddedTracks);
this.handler.onRemoteTracksChanged(this.remoteTracks); this.handler.onRemoteTracksChanged(this.remoteTracks);
} }
private wrapRemoteTrack(track: MediaStreamTrack, stream: MediaStream): TrackWrapper {
let type: TrackType; private getRemoteTrackType(track: MediaStreamTrack, streamId: string): TrackType {
if (track.kind === "video") { if (track.kind === "video") {
const purpose = this.handler.getPurposeForStreamId(stream.id); const purpose = this.handler.getPurposeForStreamId(streamId);
type = purpose === StreamPurpose.UserMedia ? TrackType.Camera : TrackType.ScreenShare; return purpose === SDPStreamMetadataPurpose.Usermedia ? TrackType.Camera : TrackType.ScreenShare;
} else { } else {
type = TrackType.Microphone; return TrackType.Microphone;
} }
return wrapTrack(track, stream, type);
} }
/** /**

View file

@ -0,0 +1,39 @@
/*
Copyright 2015, 2016 OpenMarket Ltd
Copyright 2019 The Matrix.org Foundation C.I.C.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/**
* This function is similar to Object.assign() but it assigns recursively and
* allows you to ignore nullish values from the source
*
* @param {Object} target
* @param {Object} source
* @returns the target object
*/
export function recursivelyAssign(target: Object, source: Object, ignoreNullish = false): any {
for (const [sourceKey, sourceValue] of Object.entries(source)) {
if (target[sourceKey] instanceof Object && sourceValue) {
recursivelyAssign(target[sourceKey], sourceValue);
continue;
}
if ((sourceValue !== null && sourceValue !== undefined) || !ignoreNullish) {
target[sourceKey] = sourceValue;
continue;
}
}
return target;
}