WIP6

2022-02-25 16:54:00 +01:00 · 2022-02-25 16:54:00 +01:00 · 179c7e74b5
commit 179c7e74b5
parent 98e1dcf799
8 changed files with 487 additions and 557 deletions
--- a/src/matrix/calls/CallHandler.ts
+++ b/src/matrix/calls/CallHandler.ts
@ -44,6 +44,8 @@ export class GroupCallHandler {
    }
    // TODO: check and poll turn server credentials here
    handleRoomState(room: Room, events: StateEvent[], log: ILogItem) {
        // first update call events
        for (const event of events) {
--- a/src/matrix/calls/LocalMedia.ts
+++ b/src/matrix/calls/LocalMedia.ts
@ -0,0 +1,57 @@
 /*
 Copyright 2022 The Matrix.org Foundation C.I.C.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 import {StreamPurpose} from "../../platform/types/WebRTC";
 import {Track, AudioTrack, TrackType} from "../../platform/types/MediaDevices";
 import {SDPStreamMetadata} from "./callEventTypes";
 export class LocalMedia {
    constructor(
        public readonly cameraTrack?: Track,
        public readonly screenShareTrack?: Track,
        public readonly microphoneTrack?: AudioTrack
    ) {}
    withTracks(tracks: Track[]) {
        const cameraTrack = tracks.find(t => t.type === TrackType.Camera) ?? this.cameraTrack;
        const screenShareTrack = tracks.find(t => t.type === TrackType.ScreenShare) ?? this.screenShareTrack;
        const microphoneTrack = tracks.find(t => t.type === TrackType.Microphone) ?? this.microphoneTrack;
        if (cameraTrack && microphoneTrack && cameraTrack.streamId !== microphoneTrack.streamId) {
            throw new Error("The camera and audio track should have the same stream id");
        }
        return new LocalMedia(cameraTrack, screenShareTrack, microphoneTrack as AudioTrack);
    }
    get tracks(): Track[] { return []; }
    getSDPMetadata(): SDPStreamMetadata {
        const metadata = {};
        const userMediaTrack = this.microphoneTrack ?? this.cameraTrack;
        if (userMediaTrack) {
            metadata[userMediaTrack.streamId] = {
                purpose: StreamPurpose.UserMedia,
                audio_muted: this.microphoneTrack?.muted ?? false,
                video_muted: this.cameraTrack?.muted ?? false,
            };
        }
        if (this.screenShareTrack) {
            metadata[this.screenShareTrack.streamId] = {
                purpose: StreamPurpose.ScreenShare
            };
        }
        return metadata;
    }
 }
--- a/src/matrix/calls/PeerCall.ts
+++ b/src/matrix/calls/PeerCall.ts
@ -15,7 +15,7 @@ limitations under the License.
 */
 import {ObservableMap} from "../../observable/map/ObservableMap";
-
+import {recursivelyAssign} from "../../utils/recursivelyAssign";
 import {AsyncQueue} from "../../utils/AsyncQueue";
 import type {Room} from "../room/Room";
 import type {StateEvent} from "../storage/types";
@ -23,8 +23,284 @@ import type {ILogItem} from "../../logging/types";
 import {WebRTC, PeerConnection, PeerConnectionHandler, StreamPurpose} from "../../platform/types/WebRTC";
 import {MediaDevices, Track, AudioTrack, TrackType} from "../../platform/types/MediaDevices";
 import type {LocalMedia} from "./LocalMedia";
-import { randomString } from '../randomstring';
+// when sending, we need to encrypt message with olm. I think the flow of room => roomEncryption => olmEncryption as we already
 // do for sharing keys will be best as that already deals with room tracking.
 /**
 * Does WebRTC signalling for a single PeerConnection, and deals with WebRTC wrappers from platform
 * */
 /** Implements a call between two peers with the signalling state keeping, while still delegating the signalling message sending. Used by GroupCall.*/
 class PeerCall {
    private readonly peerConnection: PeerConnection;
    private state = CallState.Fledgling;
    private direction: CallDirection;
    // A queue for candidates waiting to go out.
    // We try to amalgamate candidates into a single candidate message where
    // possible
    private candidateSendQueue: Array<RTCIceCandidate> = [];
    // If candidates arrive before we've picked an opponent (which, in particular,
    // will happen if the opponent sends candidates eagerly before the user answers
    // the call) we buffer them up here so we can then add the ones from the party we pick
    private remoteCandidateBuffer? = new Map<string, RTCIceCandidate[]>();
    private logger: any;
    private remoteSDPStreamMetadata?: SDPStreamMetadata;
    private responsePromiseChain?: Promise<void>;
    private opponentPartyId?: PartyId;
    constructor(
        private readonly handler: PeerCallHandler,
        private localMedia: LocalMedia,
        webRTC: WebRTC
    ) {
        const outer = this;
        this.peerConnection = webRTC.createPeerConnection({
            onIceConnectionStateChange(state: RTCIceConnectionState) {},
            onLocalIceCandidate(candidate: RTCIceCandidate) {},
            onIceGatheringStateChange(state: RTCIceGatheringState) {},
            onRemoteTracksChanged(tracks: Track[]) {},
            onDataChannelChanged(dataChannel: DataChannel | undefined) {},
            onNegotiationNeeded() {
                const promiseCreator = () => outer.handleNegotiation();
                outer.responsePromiseChain = outer.responsePromiseChain?.then(promiseCreator) ?? promiseCreator();
            },
            getPurposeForStreamId(streamId: string): SDPStreamMetadataPurpose {
                return outer.remoteSDPStreamMetadata?.[streamId]?.purpose ?? SDPStreamMetadataPurpose.Usermedia;
            }
        });
        this.logger = {
            debug(...args) { console.log.apply(console, ["WebRTC debug:", ...args])},
            log(...args) { console.log.apply(console, ["WebRTC log:", ...args])},
            warn(...args) { console.log.apply(console, ["WebRTC warn:", ...args])},
            error(...args) { console.error.apply(console, ["WebRTC error:", ...args])},
        }
    }
    handleIncomingSignallingMessage(message: SignallingMessage, partyId: PartyId) {
        switch (message.type) {
            case EventType.Invite:
                this.handleInvite(message.content);
                break;
            case EventType.Answer:
                this.handleAnswer(message.content);
                break;
            case EventType.Candidates:
                this.handleRemoteIceCandidates(message.content);
                break;
            case EventType.Hangup:
        }
    }
    async call(localMediaPromise: Promise<LocalMedia>): Promise<void> {
        if (this.state !== CallState.Fledgling) {
            return;
        }
        this.direction = CallDirection.Outbound;
        this.setState(CallState.WaitLocalMedia);
        try {
            this.localMedia = await localMediaPromise;
        } catch (err) {
            this.setState(CallState.Ended);
            return;
        }
        this.setState(CallState.CreateOffer);
        // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
        for (const t of this.localMedia.tracks) {
            this.peerConnection.addTrack(t);
        }
        await this.waitForState(CallState.InviteSent);
    }
    async answer() {
    }
    async hangup() {
    }
    async updateLocalMedia(localMediaPromise: Promise<LocalMedia>) {
        const oldMedia = this.localMedia;
        this.localMedia = await localMediaPromise;
        const applyTrack = (selectTrack: (media: LocalMedia) => Track | undefined) => {
            const oldTrack = selectTrack(oldMedia);
            const newTrack = selectTrack(this.localMedia);
            if (oldTrack && newTrack) {
                this.peerConnection.replaceTrack(oldTrack, newTrack);
            } else if (oldTrack) {
                this.peerConnection.removeTrack(oldTrack);
            } else if (newTrack) {
                this.peerConnection.addTrack(newTrack);
            }
        };
        // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
        applyTrack(m => m.microphoneTrack);
        applyTrack(m => m.cameraTrack);
        applyTrack(m => m.screenShareTrack);
    }
    // calls are serialized and deduplicated by negotiationQueue
    private handleNegotiation = async (): Promise<void> => {
        try {
            await this.peerConnection.setLocalDescription();
        } catch (err) {
            this.logger.debug(`Call ${this.callId} Error setting local description!`, err);
            this.terminate(CallParty.Local, CallErrorCode.SetLocalDescription, true);
            return;
        }
        if (this.peerConnection.iceGatheringState === 'gathering') {
            // Allow a short time for initial candidates to be gathered
            await new Promise(resolve => setTimeout(resolve, 200));
        }
        if (this.state === CallState.Ended) {
            return;
        }
        const offer = this.peerConnection.localDescription!;
        // Get rid of any candidates waiting to be sent: they'll be included in the local
        // description we just got and will send in the offer.
        this.logger.info(`Call ${this.callId} Discarding ${
            this.candidateSendQueue.length} candidates that will be sent in offer`);
        this.candidateSendQueue = [];
        // need to queue this
        const content = {
            offer,
            [SDPStreamMetadataKey]: this.localMedia.getSDPMetadata(),
            version: 1,
            lifetime: CALL_TIMEOUT_MS
        };
        if (this.state === CallState.CreateOffer) {
            await this.handler.sendSignallingMessage({type: EventType.Invite, content});
            this.setState(CallState.InviteSent);
        }
    };
    private async handleInvite(content: InviteContent, partyId: PartyId): Promise<void> {
        if (this.state !== CallState.Fledgling || this.opponentPartyId !== undefined) {
            // TODO: hangup or ignore?
            return;
        }
        // we must set the party ID before await-ing on anything: the call event
        // handler will start giving us more call events (eg. candidates) so if
        // we haven't set the party ID, we'll ignore them.
        this.opponentPartyId = partyId;
        this.direction = CallDirection.Inbound;
        const sdpStreamMetadata = content[SDPStreamMetadataKey];
        if (sdpStreamMetadata) {
            this.updateRemoteSDPStreamMetadata(sdpStreamMetadata);
        } else {
            this.logger.debug(`Call ${
                this.callId} did not get any SDPStreamMetadata! Can not send/receive multiple streams`);
        }
        try {
            await this.peerConnection.setRemoteDescription(content.offer);
            await this.addBufferedIceCandidates();
        } catch (e) {
            this.logger.debug(`Call ${this.callId} failed to set remote description`, e);
            this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false);
            return;
        }
        // According to previous comments in this file, firefox at some point did not
        // add streams until media started arriving on them. Testing latest firefox
        // (81 at time of writing), this is no longer a problem, so let's do it the correct way.
        if (this.peerConnection.remoteTracks.length === 0) {
            this.logger.error(`Call ${this.callId} no remote stream or no tracks after setting remote description!`);
            this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false);
            return;
        }
        this.setState(CallState.Ringing);
        setTimeout(() => {
            if (this.state == CallState.Ringing) {
                this.logger.debug(`Call ${this.callId} invite has expired. Hanging up.`);
                this.hangupParty = CallParty.Remote; // effectively
                this.setState(CallState.Ended);
                this.stopAllMedia();
                if (this.peerConnection.signalingState != 'closed') {
                    this.peerConnection.close();
                }
                this.emit(CallEvent.Hangup);
            }
        }, content.lifetime ?? CALL_TIMEOUT_MS /* - event.getLocalAge() */ );
    }
    private updateRemoteSDPStreamMetadata(metadata: SDPStreamMetadata): void {
        this.remoteSDPStreamMetadata = recursivelyAssign(this.remoteSDPStreamMetadata || {}, metadata, true);
        // will rerequest stream purpose for all tracks and set track.type accordingly
        this.peerConnection.notifyStreamPurposeChanged();
        for (const track of this.peerConnection.remoteTracks) {
            const streamMetaData = this.remoteSDPStreamMetadata?.[track.streamId];
            if (streamMetaData) {
                if (track.type === TrackType.Microphone) {
                    track.setMuted(streamMetaData.audio_muted);
                } else { // Camera or ScreenShare
                    track.setMuted(streamMetaData.video_muted);
                }
            }
        }
    }
    private async addBufferedIceCandidates(): Promise<void> {
        const bufferedCandidates = this.remoteCandidateBuffer!.get(this.opponentPartyId!);
        if (bufferedCandidates) {
            this.logger.info(`Call ${this.callId} Adding ${
                bufferedCandidates.length} buffered candidates for opponent ${this.opponentPartyId}`);
            await this.addIceCandidates(bufferedCandidates);
        }
        this.remoteCandidateBuffer = undefined;
    }
    private async addIceCandidates(candidates: RTCIceCandidate[]): Promise<void> {
        for (const candidate of candidates) {
            if (
                (candidate.sdpMid === null || candidate.sdpMid === undefined) &&
                (candidate.sdpMLineIndex === null || candidate.sdpMLineIndex === undefined)
            ) {
                this.logger.debug(`Call ${this.callId} ignoring remote ICE candidate with no sdpMid or sdpMLineIndex`);
                continue;
            }
            this.logger.debug(`Call ${this.callId} got remote ICE ${candidate.sdpMid} candidate: ${candidate.candidate}`);
            try {
                await this.peerConnection.addIceCandidate(candidate);
            } catch (err) {
                if (!this.ignoreOffer) {
                    this.logger.info(`Call ${this.callId} failed to add remote ICE candidate`, err);
                }
            }
        }
    }
    private setState(state: CallState): void {
        const oldState = this.state;
        this.state = state;
        this.handler.emitUpdate();
    }
    private waitForState(state: CallState): Promise<void> {
    }
    private async terminate(hangupParty: CallParty, hangupReason: CallErrorCode, shouldEmit: boolean): Promise<void> {
    }
 }
 //import { randomString } from '../randomstring';
 import {
    MCallReplacesEvent,
    MCallAnswer,
@ -41,41 +317,13 @@ import {
    MCallHangupReject,
 } from './callEventTypes';
 const GROUP_CALL_TYPE = "m.call";
 const GROUP_CALL_MEMBER_TYPE = "m.call.member";
 /**
 * Fires whenever an error occurs when call.js encounters an issue with setting up the call.
 * <p>
 * The error given will have a code equal to either `MatrixCall.ERR_LOCAL_OFFER_FAILED` or
 * `MatrixCall.ERR_NO_USER_MEDIA`. `ERR_LOCAL_OFFER_FAILED` is emitted when the local client
 * fails to create an offer. `ERR_NO_USER_MEDIA` is emitted when the user has denied access
 * to their audio/video hardware.
 *
 * @event module:webrtc/call~MatrixCall#"error"
 * @param {Error} err The error raised by MatrixCall.
 * @example
 * matrixCall.on("error", function(err){
 *   console.error(err.code, err);
 * });
 */
 // null is used as a special value meaning that the we're in a legacy 1:1 call
 // without MSC2746 that doesn't provide an id which device sent the message.
 type PartyId = string | null;
-interface TurnServer {
+export enum CallParty {
-    urls: Array<string>;
+    Local = 'local',
-    username?: string;
+    Remote = 'remote',
    password?: string;
    ttl?: number;
 }
 interface AssertedIdentity {
    id: string;
    displayName: string;
 }
 export enum CallState {
@ -90,21 +338,11 @@ export enum CallState {
    Ended = 'ended',
 }
 export enum CallType {
    Voice = 'voice',
    Video = 'video',
 }
 export enum CallDirection {
    Inbound = 'inbound',
    Outbound = 'outbound',
 }
 export enum CallParty {
    Local = 'local',
    Remote = 'remote',
 }
 export enum EventType {
    Invite = "m.call.invite",
    Candidates = "m.call.candidates",
@ -120,30 +358,6 @@ export enum EventType {
    AssertedIdentityPrefix = "org.matrix.call.asserted_identity",
 }
 export enum CallEvent {
    Hangup = 'hangup',
    State = 'state',
    Error = 'error',
    Replaced = 'replaced',
    // The value of isLocalOnHold() has changed
    LocalHoldUnhold = 'local_hold_unhold',
    // The value of isRemoteOnHold() has changed
    RemoteHoldUnhold = 'remote_hold_unhold',
    // backwards compat alias for LocalHoldUnhold: remove in a major version bump
    HoldUnhold = 'hold_unhold',
    // Feeds have changed
    FeedsChanged = 'feeds_changed',
    AssertedIdentityChanged = 'asserted_identity_changed',
    LengthChanged = 'length_changed',
    DataChannel = 'datachannel',
    SendVoipEvent = "send_voip_event",
 }
 export enum CallErrorCode {
    /** The user chose to end the call */
    UserHangup = 'user_hangup',
@ -254,477 +468,21 @@ export class CallError extends Error {
    }
 }
-export function genCallID(): string {
+type InviteContent = {
-    return Date.now().toString() + randomString(16);
+    offer: RTCSessionDescriptionInit,
-}
+    [SDPStreamMetadataKey]: SDPStreamMetadata,
-
+    version?: number,
-enum CallSetupMessageType {
+    lifetime?: number
    Invite = "m.call.invite",
    Answer = "m.call.answer",
    Candidates = "m.call.candidates",
    Hangup = "m.call.hangup",
 }
 const CALL_ID = "m.call_id";
 const CALL_TERMINATED = "m.terminated";
 class LocalMedia {
    constructor(
        public readonly cameraTrack?: Track,
        public readonly screenShareTrack?: Track,
        public readonly microphoneTrack?: AudioTrack
    ) {}
    withTracks(tracks: Track[]) {
        const cameraTrack = tracks.find(t => t.type === TrackType.Camera) ?? this.cameraTrack;
        const screenShareTrack = tracks.find(t => t.type === TrackType.ScreenShare) ?? this.screenShareTrack;
        const microphoneTrack = tracks.find(t => t.type === TrackType.Microphone) ?? this.microphoneTrack;
        if (cameraTrack && microphoneTrack && cameraTrack.streamId !== microphoneTrack.streamId) {
            throw new Error("The camera and audio track should have the same stream id");
        }
        return new LocalMedia(cameraTrack, screenShareTrack, microphoneTrack as AudioTrack);
    }
    get tracks(): Track[] { return []; }
    getSDPMetadata(): any {
        const metadata = {};
        const userMediaTrack = this.microphoneTrack ?? this.cameraTrack;
        if (userMediaTrack) {
            metadata[userMediaTrack.streamId] = {
                purpose: StreamPurpose.UserMedia,
                audio_muted: this.microphoneTrack?.muted ?? false,
                video_muted: this.cameraTrack?.muted ?? false,
            };
        }
        if (this.screenShareTrack) {
            metadata[this.screenShareTrack.streamId] = {
                purpose: StreamPurpose.ScreenShare
            };
        }
        return metadata;
    }
 }
 export type InviteMessage = {
    type: EventType.Invite,
-    content: {
+    content: InviteContent
        version: number
    }
 }
 export type SignallingMessage = InviteMessage;
 export interface PeerCallHandler {
    emitUpdate(peerCall: PeerCall, params: any);
-    sendSignallingMessage(type: EventType, content: Record<string, any>);
+    sendSignallingMessage(message: InviteMessage);
 }
 // when sending, we need to encrypt message with olm. I think the flow of room => roomEncryption => olmEncryption as we already
 // do for sharing keys will be best as that already deals with room tracking.
 /**
 * Does WebRTC signalling for a single PeerConnection, and deals with WebRTC wrappers from platform
 * */
 /** Implements a call between two peers with the signalling state keeping, while still delegating the signalling message sending. Used by GroupCall.*/
 class PeerCall implements PeerConnectionHandler {
    private readonly peerConnection: PeerConnection;
    public state = CallState.Fledgling;
    public hangupParty: CallParty;
    public hangupReason: string;
    public direction: CallDirection;
    public peerConn?: RTCPeerConnection;
    // A queue for candidates waiting to go out.
    // We try to amalgamate candidates into a single candidate message where
    // possible
    private candidateSendQueue: Array<RTCIceCandidate> = [];
    private candidateSendTries = 0;
    private sentEndOfCandidates = false;
    private inviteOrAnswerSent = false;
    private waitForLocalAVStream: boolean;
    private opponentVersion: number | string;
    // The party ID of the other side: undefined if we haven't chosen a partner
    // yet, null if we have but they didn't send a party ID.
    private opponentPartyId: PartyId;
    private opponentCaps: CallCapabilities;
    private inviteTimeout: number;
    private iceDisconnectedTimeout: number;
    // The logic of when & if a call is on hold is nontrivial and explained in is*OnHold
    // This flag represents whether we want the other party to be on hold
    private remoteOnHold = false;
    // the stats for the call at the point it ended. We can't get these after we
    // tear the call down, so we just grab a snapshot before we stop the call.
    // The typescript definitions have this type as 'any' :(
    private callStatsAtEnd: any[];
    // Perfect negotiation state: https://www.w3.org/TR/webrtc/#perfect-negotiation-example
    private makingOffer = false;
    private ignoreOffer: boolean;
    // If candidates arrive before we've picked an opponent (which, in particular,
    // will happen if the opponent sends candidates eagerly before the user answers
    // the call) we buffer them up here so we can then add the ones from the party we pick
    private remoteCandidateBuffer: Map<PartyId, RTCIceCandidate[]>;
    private remoteAssertedIdentity: AssertedIdentity;
    private remoteSDPStreamMetadata?: SDPStreamMetadata;
    private negotiationQueue: AsyncQueue<void, void>;
    constructor(
        private readonly handler: PeerCallHandler,
        private localMedia: LocalMedia,
        webRTC: WebRTC
    ) {
        this.peerConnection = webRTC.createPeerConnection(this);
        // TODO: should we use this to serialize all state changes?
        this.negotiationQueue = new AsyncQueue(this.handleNegotiation, void);
    }
    // PeerConnectionHandler method
    onIceConnectionStateChange(state: RTCIceConnectionState) {}
    // PeerConnectionHandler method
    onLocalIceCandidate(candidate: RTCIceCandidate) {}
    // PeerConnectionHandler method
    onIceGatheringStateChange(state: RTCIceGatheringState) {}
    // PeerConnectionHandler method
    onRemoteTracksChanged(tracks: Track[]) {}
    // PeerConnectionHandler method
    onDataChannelChanged(dataChannel: DataChannel | undefined) {}
    // PeerConnectionHandler method
    onNegotiationNeeded() {
        // trigger handleNegotiation
        this.negotiationQueue.push(void);   
    }
    // calls are serialized and deduplicated by negotiationQueue
    private handleNegotiation = async (): Promise<void> => {
        const offer = await this.peerConnection.createOffer();
        this.peerConnection.setLocalDescription(offer);
        // need to queue this
        const message = {
            offer,
            sdp_stream_metadata: this.localMedia.getSDPMetadata(),
            version: 1
        }
        if (this.state === CallState.Fledgling) {
            const sendPromise = this.handler.sendSignallingMessage(EventType.Invite, message);
            this.setState(CallState.InviteSent);
        } else {
            await this.handler.sendSignallingMessage(EventType.Negotiate, message);
        }
    };
    async sendInvite(localMediaPromise: Promise<LocalMedia>): Promise<void> {
        if (this.state !== CallState.Fledgling) {
            return;
        }
        this.setState(CallState.WaitLocalMedia);
        this.localMedia = await localMediaPromise;
        // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
        for (const t of this.localMedia.tracks) {
            this.peerConnection.addTrack(t);
        }
        await this.waitForState(CallState.Ended, CallState.InviteSent);
    }
    async sendAnswer(localMediaPromise: Promise<LocalMedia>): Promise<void> {
        if (this.callHasEnded()) return;
        if (this.state !== CallState.Ringing) {
            return;
        }
        this.setState(CallState.WaitLocalMedia);
        this.waitForLocalAVStream = true;
        this.localMedia = await localMediaPromise;
        this.waitForLocalAVStream = false;
        // enqueue the following
        // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
        for (const t of this.localMedia.tracks) {
            this.peerConnection.addTrack(t);
        }
        this.setState(CallState.CreateAnswer);
        let myAnswer;
        try {
            myAnswer = await this.peerConn.createAnswer();
        } catch (err) {
            logger.debug("Failed to create answer: ", err);
            this.terminate(CallParty.Local, CallErrorCode.CreateAnswer, true);
            return;
        }
        try {
            await this.peerConn.setLocalDescription(myAnswer);
            this.setState(CallState.Connecting);
            // Allow a short time for initial candidates to be gathered
            await new Promise(resolve => {
                setTimeout(resolve, 200);
            });
            // inlined sendAnswer
            const answerContent = {
                answer: {
                    sdp: this.peerConn.localDescription.sdp,
                    // type is now deprecated as of Matrix VoIP v1, but
                    // required to still be sent for backwards compat
                    type: this.peerConn.localDescription.type,
                },
                [SDPStreamMetadataKey]: this.getLocalSDPStreamMetadata(true),
            } as MCallAnswer;
            answerContent.capabilities = {
                'm.call.transferee': this.client.supportsCallTransfer,
                'm.call.dtmf': false,
            };
            // We have just taken the local description from the peerConn which will
            // contain all the local candidates added so far, so we can discard any candidates
            // we had queued up because they'll be in the answer.
            logger.info(`Discarding ${this.candidateSendQueue.length} candidates that will be sent in answer`);
            this.candidateSendQueue = [];
            try {
                await this.sendVoipEvent(EventType.CallAnswer, answerContent);
                // If this isn't the first time we've tried to send the answer,
                // we may have candidates queued up, so send them now.
                this.inviteOrAnswerSent = true;
            } catch (error) {
                // We've failed to answer: back to the ringing state
                this.setState(CallState.Ringing);
                this.client.cancelPendingEvent(error.event);
                let code = CallErrorCode.SendAnswer;
                let message = "Failed to send answer";
                if (error.name == 'UnknownDeviceError') {
                    code = CallErrorCode.UnknownDevices;
                    message = "Unknown devices present in the room";
                }
                this.emit(CallEvent.Error, new CallError(code, message, error));
                throw error;
            }
            // error handler re-throws so this won't happen on error, but
            // we don't want the same error handling on the candidate queue
            this.sendCandidateQueue();
        } catch (err) {
            logger.debug("Error setting local description!", err);
            this.terminate(CallParty.Local, CallErrorCode.SetLocalDescription, true);
            return;
        }
    }
    async updateLocalMedia(localMediaPromise: Promise<LocalMedia>) {
        const oldMedia = this.localMedia;
        this.localMedia = await localMediaPromise;
        const applyTrack = (selectTrack: (media: LocalMedia) => Track | undefined) => {
            const oldTrack = selectTrack(oldMedia);
            const newTrack = selectTrack(this.localMedia);
            if (oldTrack && newTrack) {
                this.peerConnection.replaceTrack(oldTrack, newTrack);
            } else if (oldTrack) {
                this.peerConnection.removeTrack(oldTrack);
            } else if (newTrack) {
                this.peerConnection.addTrack(newTrack);
            }
        };
        // add the local tracks, and wait for onNegotiationNeeded and handleNegotiation to be called
        applyTrack(m => m.microphoneTrack);
        applyTrack(m => m.cameraTrack);
        applyTrack(m => m.screenShareTrack);
    }
    /**
     * Replace this call with a new call, e.g. for glare resolution. Used by
     * MatrixClient.
     * @param {MatrixCall} newCall The new call.
     */
    public replacedBy(newCall: MatrixCall): void {
        if (this.state === CallState.WaitLocalMedia) {
            logger.debug("Telling new call to wait for local media");
            newCall.waitForLocalAVStream = true;
        } else if ([CallState.CreateOffer, CallState.InviteSent].includes(this.state)) {
            if (newCall.direction === CallDirection.Outbound) {
                newCall.queueGotCallFeedsForAnswer([]);
            } else {
                logger.debug("Handing local stream to new call");
                newCall.queueGotCallFeedsForAnswer(this.getLocalFeeds().map(feed => feed.clone()));
            }
        }
        this.successor = newCall;
        this.emit(CallEvent.Replaced, newCall);
        this.hangup(CallErrorCode.Replaced, true);
    }
    /**
     * Hangup a call.
     * @param {string} reason The reason why the call is being hung up.
     * @param {boolean} suppressEvent True to suppress emitting an event.
     */
    public hangup(reason: CallErrorCode, suppressEvent: boolean): void {
        if (this.callHasEnded()) return;
        logger.debug("Ending call " + this.callId);
        this.terminate(CallParty.Local, reason, !suppressEvent);
        // We don't want to send hangup here if we didn't even get to sending an invite
        if (this.state === CallState.WaitLocalMedia) return;
        const content = {};
        // Don't send UserHangup reason to older clients
        if ((this.opponentVersion && this.opponentVersion >= 1) || reason !== CallErrorCode.UserHangup) {
            content["reason"] = reason;
        }
        this.sendVoipEvent(EventType.CallHangup, content);
    }
    /**
     * Reject a call
     * This used to be done by calling hangup, but is a separate method and protocol
     * event as of MSC2746.
     */
    public reject(): void {
        if (this.state !== CallState.Ringing) {
            throw Error("Call must be in 'ringing' state to reject!");
        }
        if (this.opponentVersion < 1) {
            logger.info(
                `Opponent version is less than 1 (${this.opponentVersion}): sending hangup instead of reject`,
            );
            this.hangup(CallErrorCode.UserHangup, true);
            return;
        }
        logger.debug("Rejecting call: " + this.callId);
        this.terminate(CallParty.Local, CallErrorCode.UserHangup, true);
        this.sendVoipEvent(EventType.CallReject, {});
    }
    // request the type of incoming track
    getPurposeForStreamId(streamId: string): StreamPurpose {
        // TODO: should we return a promise here for the case where the metadata hasn't arrived yet?
        const metaData = this.remoteSDPStreamMetadata[streamId];
        return metadata?.purpose as StreamPurpose ?? StreamPurpose.UserMedia;
    }
    private setState(state: CallState): void {
        const oldState = this.state;
        this.state = state;
        this.handler.emitUpdate();
        if (this.inviteDeferred) {
            if (this.state === CallState.InviteSent) {
                this.inviteDeferred.resolve();   
            }
        }
    }
    handleIncomingSignallingMessage(type: CallSetupMessageType, content: Record<string, any>, partyId: PartyId) {
        switch (type) {
            case CallSetupMessageType.Invite:
            case CallSetupMessageType.Answer:
                this.handleAnswer(content);
                break;
            case CallSetupMessageType.Candidates:
                this.handleRemoteIceCandidates(content);
                break;
            case CallSetupMessageType.Hangup:
        }
    }
    private async handleAnswer(content: MCallAnswer, partyId: PartyId) {
        // add buffered ice candidates to peerConnection
        if (this.opponentPartyId !== undefined) {
            return;
        }
        this.opponentPartyId = partyId;
        const bufferedCandidates = this.remoteCandidateBuffer?.get(partyId);
        if (bufferedCandidates) {
            this.addIceCandidates(bufferedCandidates);
        }
        this.remoteCandidateBuffer = undefined;
        this.setState(CallState.Connecting);
        const sdpStreamMetadata = content[SDPStreamMetadataKey];
        if (sdpStreamMetadata) {
            this.updateRemoteSDPStreamMetadata(sdpStreamMetadata);
        } else {
            logger.warn("Did not get any SDPStreamMetadata! Can not send/receive multiple streams");
        }
        try {
            await this.peerConnection.setRemoteDescription(content.answer);
        } catch (e) {
            logger.debug("Failed to set remote description", e);
            this.terminate(CallParty.Local, CallErrorCode.SetRemoteDescription, false);
            return;
        }
        // If the answer we selected has a party_id, send a select_answer event
        // We do this after setting the remote description since otherwise we'd block
        // call setup on it
        if (this.opponentPartyId !== null) {
            try {
                await this.sendVoipEvent(EventType.CallSelectAnswer, {
                    selected_party_id: this.opponentPartyId,
                });
            } catch (err) {
                // This isn't fatal, and will just mean that if another party has raced to answer
                // the call, they won't know they got rejected, so we carry on & don't retry.
                logger.warn("Failed to send select_answer event", err);
            }
        }
    }
    private handleRemoteIceCandidates(content: Record<string, any>) {
        if (this.state === CallState.Ended) {
            return;
        }
        const candidates = content.candidates;
        if (!candidates) {
            return;
        }
        if (this.opponentPartyId === undefined) {
            if (!this.remoteCandidateBuffer) {
                this.remoteCandidateBuffer = new Map();
            }
            const bufferedCandidates = this.remoteCandidateBuffer.get(fromPartyId) || [];
            bufferedCandidates.push(...candidates);
            this.remoteCandidateBuffer.set(fromPartyId, bufferedCandidates);
        } else {
            this.addIceCandidates(candidates);
        }
    }
    private async addIceCandidates(candidates: RTCIceCandidate[]): Promise<void> {
        for (const candidate of candidates) {
            if (
                (candidate.sdpMid === null || candidate.sdpMid === undefined) &&
                (candidate.sdpMLineIndex === null || candidate.sdpMLineIndex === undefined)
            ) {
                logger.debug("Ignoring remote ICE candidate with no sdpMid or sdpMLineIndex");
                continue;
            }
            logger.debug(
                "Call " + this.callId + " got remote ICE " + candidate.sdpMid + " candidate: " + candidate.candidate,
            );
            try {
                await this.peerConnection.addIceCandidate(candidate);
            } catch (err) {
                if (!this.ignoreOffer) {
                    logger.info("Failed to add remote ICE candidate", err);
                }
            }
        }
    }
 }
--- a/src/matrix/calls/TODO.md
+++ b/src/matrix/calls/TODO.md
@ -1,6 +1,17 @@
 - relevant MSCs next to spec:
  - https://github.com/matrix-org/matrix-doc/pull/2746 Improved Signalling for 1:1 VoIP
  - https://github.com/matrix-org/matrix-doc/pull/2747 Transferring VoIP Calls
  - https://github.com/matrix-org/matrix-doc/pull/3077 Support for multi-stream VoIP
  - https://github.com/matrix-org/matrix-doc/pull/3086 Asserted identity on VoIP calls
  - https://github.com/matrix-org/matrix-doc/pull/3291 Muting in VoIP calls
  - https://github.com/matrix-org/matrix-doc/pull/3401 Native Group VoIP Signalling
 ## TODO
 - PeerCall
    - send invite
    - implement terminate
    - implement waitForState
        - find out if we need to do something different when renegotation is triggered (a subsequent onnegotiationneeded event) whether
          we sent the invite/offer or answer. e.g. do we always do createOffer/setLocalDescription and then send it over a matrix negotiation event? even if we before called createAnswer.
    - handle receiving offer and send anwser
@ -36,12 +47,29 @@ we wait for other participants to add their user and device (in the sources)
 for each (userid, deviceid)
    - if userId < ourUserId
        - get local media
        - we setup a peer connection
        - add local tracks
        - we wait for negotation event to get sdp
        - peerConn.createOffer
        - peerConn.setLocalDescription
        - we send an m.call.invite 
    - else
        - wait for invite from other side
 on local ice candidate:
    - if we haven't ... sent invite yet? or received answer? buffer candidate
    - otherwise send candidate (without buffering?)
 on incoming call:
    - ring, offer to answer
 answering incoming call
    - get local media
    - peerConn.setRemoteDescription
    - add local tracks to peerConn
    - peerConn.createAnswer()
    - peerConn.setLocalDescription
 in some cases, we will actually send the invite to all devices (e.g. SFU), so
 we probably still need to handle multiple anwsers?
@ -50,9 +78,6 @@ so we would send an invite to multiple devices and pick the one for which we
 received the anwser first. between invite and anwser, we could already receive
 ice candidates that we need to buffer.
 should a PeerCall only exist after we've received an answer?
 Before that, we could have a PeerCallInvite
 updating the metadata:
@ -64,3 +89,38 @@ if just muting: use m.call.sdp_stream_metadata_changed
 party identification
 - for 1:1 calls, we identify with a party_id
 - for group calls, we identify with a device_id
 ## TODO
 Build basic version of PeerCall
 Build basic version of GroupCall
 Make it possible to olm encrypt the messages
 Do work needed for state events
    - receiving (almost done?)
    - sending
 Expose call objects
 Write view model
 write view
 ## Calls questions\
 - how do we handle glare between group calls (e.g. different state events with different call ids?)
 - Split up DOM part into platform code? What abstractions to choose?
   Does it make sense to come up with our own API very similar to DOM api?
 - what code do we copy over vs what do we implement ourselves?
    - MatrixCall: perhaps we can copy it over and modify it to our needs? Seems to have a lot of edge cases implemented.
        - what is partyId about?
    - CallFeed: I need better understand where it is used. It's basically a wrapper around a MediaStream with volume detection. Could it make sense to put this in platform for example?
 - which parts of MSC2746 are still relevant for group calls?
 - which parts of MSC2747 are still relevant for group calls? it seems mostly orthogonal?
 - SOLVED: how does switching channels work? This was only enabled by MSC 2746
    - you do getUserMedia()/getDisplayMedia() to get the stream(s)
    - you call removeTrack/addTrack on the peerConnection
    - you receive a negotiationneeded event
    - you call createOffer
    - you send m.call.negotiate
 - SOLVED: wrt to MSC2746, is the screen share track and the audio track (and video track) part of the same stream? or do screen share tracks need to go in a different stream? it sounds incompatible with the MSC2746 requirement.
 - SOLVED: how does muting work? MediaStreamTrack.enabled
--- a/src/platform/types/WebRTC.ts
+++ b/src/platform/types/WebRTC.ts
@ -15,11 +15,7 @@ limitations under the License.
 */
 import {Track, TrackType} from "./MediaDevices";
-
+import {SDPStreamMetadataPurpose} from "../../matrix/calls/callEventTypes";
 export enum StreamPurpose {
    UserMedia = "m.usermedia",
    ScreenShare = "m.screenshare"
 }
 export interface WebRTC {
    createPeerConnection(handler: PeerConnectionHandler): PeerConnection;
@ -33,7 +29,7 @@ export interface PeerConnectionHandler {
    onDataChannelChanged(dataChannel: DataChannel | undefined);
    onNegotiationNeeded();
    // request the type of incoming stream
-    getPurposeForStreamId(streamId: string): StreamPurpose;
+    getPurposeForStreamId(streamId: string): SDPStreamMetadataPurpose;
 }
 // does it make sense to wrap this?
 export interface DataChannel {
@ -42,8 +38,11 @@ export interface DataChannel {
 }
 export interface PeerConnection {
-    get remoteTracks(): Track[] | undefined;
+    notifyStreamPurposeChanged(): void;
    get remoteTracks(): Track[];
    get dataChannel(): DataChannel | undefined;
    get iceGatheringState(): RTCIceGatheringState;
    get localDescription(): RTCSessionDescription | undefined;
    createOffer(): Promise<RTCSessionDescriptionInit>;
    createAnswer(): Promise<RTCSessionDescriptionInit>;
    setLocalDescription(description?: RTCSessionDescriptionInit): Promise<void>;
--- a/src/platform/web/dom/MediaDevices.ts
+++ b/src/platform/web/dom/MediaDevices.ts
@ -90,9 +90,10 @@ export class TrackWrapper implements Track {
    constructor(
        public readonly track: MediaStreamTrack,
        public readonly stream: MediaStream,
-        public readonly type: TrackType
+        private _type: TrackType,
    ) {}
    get type(): TrackType { return this._type; }
    get label(): string { return this.track.label; }
    get id(): string { return this.track.id; }
    get streamId(): string { return this.stream.id; }
@ -102,6 +103,10 @@ export class TrackWrapper implements Track {
    setMuted(muted: boolean): void {
        this.track.enabled = !muted;
    }
    setType(type: TrackType): void {
        this._type = type;
    }
 }
 export class AudioTrackWrapper extends TrackWrapper {
--- a/src/platform/web/dom/WebRTC.ts
+++ b/src/platform/web/dom/WebRTC.ts
@ -16,7 +16,8 @@ limitations under the License.
 import {TrackWrapper, wrapTrack} from "./MediaDevices";
 import {Track, TrackType} from "../../types/MediaDevices";
-import {WebRTC, PeerConnectionHandler, DataChannel, PeerConnection, StreamPurpose} from "../../types/WebRTC";
+import {WebRTC, PeerConnectionHandler, DataChannel, PeerConnection} from "../../types/WebRTC";
 import {SDPStreamMetadataPurpose} from "../../../matrix/calls/callEventTypes";
 const POLLING_INTERVAL = 200; // ms
 export const SPEAKING_THRESHOLD = -60; // dB
@ -25,8 +26,8 @@ const SPEAKING_SAMPLE_COUNT = 8; // samples
 class DOMPeerConnection implements PeerConnection {
    private readonly peerConnection: RTCPeerConnection;
    private readonly handler: PeerConnectionHandler;
-    private dataChannelWrapper?: DOMDataChannel;
+    //private dataChannelWrapper?: DOMDataChannel;
-    private _remoteTracks: TrackWrapper[];
+    private _remoteTracks: TrackWrapper[] = [];
    constructor(handler: PeerConnectionHandler, forceTURN: boolean, turnServers: RTCIceServer[], iceCandidatePoolSize = 0) {
        this.handler = handler;
@ -39,7 +40,9 @@ class DOMPeerConnection implements PeerConnection {
    }
    get remoteTracks(): Track[] { return this._remoteTracks; }
-    get dataChannel(): DataChannel | undefined { return this.dataChannelWrapper; }
+    get dataChannel(): DataChannel | undefined { return undefined; }
    get iceGatheringState(): RTCIceGatheringState { return this.peerConnection.iceGatheringState; }
    get localDescription(): RTCSessionDescription | undefined { return this.peerConnection.localDescription ?? undefined; }
    createOffer(): Promise<RTCSessionDescriptionInit> {
        return this.peerConnection.createOffer();
@ -97,6 +100,14 @@ class DOMPeerConnection implements PeerConnection {
        }
        return false;
    }
    notifyStreamPurposeChanged(): void {
        for (const track of this.remoteTracks) {
            const wrapper = track as TrackWrapper;
            wrapper.setType(this.getRemoteTrackType(wrapper.track, wrapper.streamId));
        }
    }
    createDataChannel(): DataChannel {
        return new DataChannel(this.peerConnection.createDataChannel());
    }
@ -173,20 +184,19 @@ class DOMPeerConnection implements PeerConnection {
        // of the new tracks, filter the ones that we didn't already knew about
        const addedTracks = updatedTracks.filter(ut => !this._remoteTracks.some(t => t.track.id === ut.track.id));
        // wrap them
-        const wrappedAddedTracks = addedTracks.map(t => this.wrapRemoteTrack(t.track, t.stream));
+        const wrappedAddedTracks = addedTracks.map(t => wrapTrack(t.track, t.stream, this.getRemoteTrackType(t.track, t.stream.id)));
        // and concat the tracks for other streams with the added tracks
        this._remoteTracks = withoutRemovedTracks.concat(...wrappedAddedTracks);
        this.handler.onRemoteTracksChanged(this.remoteTracks);
    }
-    private wrapRemoteTrack(track: MediaStreamTrack, stream: MediaStream): TrackWrapper {
+
-        let type: TrackType;
+    private getRemoteTrackType(track: MediaStreamTrack, streamId: string): TrackType {
        if (track.kind === "video") {
-            const purpose = this.handler.getPurposeForStreamId(stream.id);
+            const purpose = this.handler.getPurposeForStreamId(streamId);
-            type = purpose === StreamPurpose.UserMedia ? TrackType.Camera : TrackType.ScreenShare;
+            return purpose === SDPStreamMetadataPurpose.Usermedia ? TrackType.Camera : TrackType.ScreenShare;
        } else {
-            type = TrackType.Microphone;
+            return TrackType.Microphone;
        }
        return wrapTrack(track, stream, type);
    }
    /**
--- a/src/utils/recursivelyAssign.ts
+++ b/src/utils/recursivelyAssign.ts
@ -0,0 +1,39 @@
 /*
 Copyright 2015, 2016 OpenMarket Ltd
 Copyright 2019 The Matrix.org Foundation C.I.C.
 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
 /**
 * This function is similar to Object.assign() but it assigns recursively and
 * allows you to ignore nullish values from the source
 *
 * @param {Object} target
 * @param {Object} source
 * @returns the target object
 */
 export function recursivelyAssign(target: Object, source: Object, ignoreNullish = false): any {
    for (const [sourceKey, sourceValue] of Object.entries(source)) {
        if (target[sourceKey] instanceof Object && sourceValue) {
            recursivelyAssign(target[sourceKey], sourceValue);
            continue;
        }
        if ((sourceValue !== null && sourceValue !== undefined) || !ignoreNullish) {
            target[sourceKey] = sourceValue;
            continue;
        }
    }
    return target;
 }