From a859db05bba6fcbe2aaa73347944a91661423842 Mon Sep 17 00:00:00 2001 From: Nutomic Date: Mon, 4 Mar 2024 12:47:42 +0100 Subject: [PATCH] Add security checks when fetching objects (#95) * Add security checks when fetching objects * static * as ref * update comment * fix header * Update comment --- src/error.rs | 14 ++++++++++---- src/fetch/mod.rs | 41 ++++++++++++++++++++++++++++++++++++----- src/fetch/webfinger.rs | 6 +++++- src/lib.rs | 19 ++++++++++++------- 4 files changed, 63 insertions(+), 17 deletions(-) diff --git a/src/error.rs b/src/error.rs index ba9248a..1383ddb 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,13 +1,11 @@ //! Error messages returned by this library -use std::string::FromUtf8Error; - +use crate::fetch::webfinger::WebFingerError; use http_signature_normalization_reqwest::SignError; use openssl::error::ErrorStack; +use std::string::FromUtf8Error; use url::Url; -use crate::fetch::webfinger::WebFingerError; - /// Error messages returned by this library #[derive(thiserror::Error, Debug)] pub enum Error { @@ -62,6 +60,14 @@ pub enum Error { /// Signing errors #[error(transparent)] SignError(#[from] SignError), + /// Attempted to fetch object which doesn't have valid ActivityPub Content-Type + #[error( + "Attempted to fetch object from {0} which doesn't have valid ActivityPub Content-Type" + )] + FetchInvalidContentType(Url), + /// Attempted to fetch object but the response's id field doesn't match + #[error("Attempted to fetch object from {0} but the response's id field doesn't match")] + FetchWrongId(Url), /// Other generic errors #[error("{0}")] Other(String), diff --git a/src/fetch/mod.rs b/src/fetch/mod.rs index 474d946..71e1ce5 100644 --- a/src/fetch/mod.rs +++ b/src/fetch/mod.rs @@ -5,12 +5,13 @@ use crate::{ config::Data, error::{Error, Error::ParseFetchedObject}, + extract_id, http_signatures::sign_request, reqwest_shim::ResponseExt, FEDERATION_CONTENT_TYPE, }; use bytes::Bytes; -use http::StatusCode; +use http::{HeaderValue, StatusCode}; use serde::de::DeserializeOwned; use std::sync::atomic::Ordering; use tracing::info; @@ -29,6 +30,8 @@ pub struct FetchObjectResponse { pub object: Kind, /// Contains the final URL (different from request URL in case of redirect) pub url: Url, + content_type: Option, + object_id: Option, } /// Fetch a remote object over HTTP and convert to `Kind`. @@ -42,12 +45,32 @@ pub struct FetchObjectResponse { /// [Error::RequestLimit]. This prevents denial of service attacks where an attack triggers /// infinite, recursive fetching of data. /// -/// The `Accept` header will be set to the content of [`FEDERATION_CONTENT_TYPE`]. +/// The `Accept` header will be set to the content of [`FEDERATION_CONTENT_TYPE`]. When parsing the +/// response it ensures that it has a valid `Content-Type` header as defined by ActivityPub, to +/// prevent security vulnerabilities like [this one](https://github.com/mastodon/mastodon/security/advisories/GHSA-jhrq-qvrm-qr36). +/// Additionally it checks that the `id` field is identical to the fetch URL (after redirects). pub async fn fetch_object_http( url: &Url, data: &Data, ) -> Result, Error> { - fetch_object_http_with_accept(url, data, FEDERATION_CONTENT_TYPE).await + static CONTENT_TYPE: HeaderValue = HeaderValue::from_static(FEDERATION_CONTENT_TYPE); + static ALT_CONTENT_TYPE: HeaderValue = HeaderValue::from_static( + r#"application/ld+json; profile="https://www.w3.org/ns/activitystreams""#, + ); + let res = fetch_object_http_with_accept(url, data, &CONTENT_TYPE).await?; + + // Ensure correct content-type to prevent vulnerabilities. + if res.content_type.as_ref() != Some(&CONTENT_TYPE) + && res.content_type.as_ref() != Some(&ALT_CONTENT_TYPE) + { + return Err(Error::FetchInvalidContentType(res.url)); + } + + // Ensure id field matches final url + if res.object_id.as_ref() != Some(&res.url) { + return Err(Error::FetchWrongId(res.url)); + } + Ok(res) } /// Fetch a remote object over HTTP and convert to `Kind`. This function works exactly as @@ -55,7 +78,7 @@ pub async fn fetch_object_http( async fn fetch_object_http_with_accept( url: &Url, data: &Data, - content_type: &str, + content_type: &HeaderValue, ) -> Result, Error> { let config = &data.config; // dont fetch local objects this way @@ -93,9 +116,17 @@ async fn fetch_object_http_with_accept( } let url = res.url().clone(); + let content_type = res.headers().get("Content-Type").cloned(); let text = res.bytes_limited().await?; + let object_id = extract_id(&text).ok(); + match serde_json::from_slice(&text) { - Ok(object) => Ok(FetchObjectResponse { object, url }), + Ok(object) => Ok(FetchObjectResponse { + object, + url, + content_type, + object_id, + }), Err(e) => Err(ParseFetchedObject( e, url, diff --git a/src/fetch/webfinger.rs b/src/fetch/webfinger.rs index 68b110d..7da2fdf 100644 --- a/src/fetch/webfinger.rs +++ b/src/fetch/webfinger.rs @@ -5,6 +5,7 @@ use crate::{ traits::{Actor, Object}, FEDERATION_CONTENT_TYPE, }; +use http::HeaderValue; use itertools::Itertools; use once_cell::sync::Lazy; use regex::Regex; @@ -33,6 +34,9 @@ impl WebFingerError { } } +/// The content-type for webfinger responses. +pub static WEBFINGER_CONTENT_TYPE: HeaderValue = HeaderValue::from_static("application/jrd+json"); + /// Takes an identifier of the form `name@example.com`, and returns an object of `Kind`. /// /// For this the identifier is first resolved via webfinger protocol to an Activitypub ID. This ID @@ -58,7 +62,7 @@ where let res: Webfinger = fetch_object_http_with_accept( &Url::parse(&fetch_url).map_err(Error::UrlParse)?, data, - "application/jrd+json", + &WEBFINGER_CONTENT_TYPE, ) .await? .object; diff --git a/src/lib.rs b/src/lib.rs index f482aa0..c025d29 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,7 +35,7 @@ use serde::{de::DeserializeOwned, Deserialize}; use url::Url; /// Mime type for Activitypub data, used for `Accept` and `Content-Type` HTTP headers -pub static FEDERATION_CONTENT_TYPE: &str = "application/activity+json"; +pub const FEDERATION_CONTENT_TYPE: &str = "application/activity+json"; /// Deserialize incoming inbox activity to the given type, perform basic /// validation and extract the actor. @@ -53,12 +53,8 @@ where { let activity: Activity = serde_json::from_slice(body).map_err(|e| { // Attempt to include activity id in error message - #[derive(Deserialize)] - struct Id { - id: Url, - } - let id = serde_json::from_slice::(body).ok(); - Error::ParseReceivedActivity(e, id.map(|i| i.id)) + let id = extract_id(body).ok(); + Error::ParseReceivedActivity(e, id) })?; data.config.verify_url_and_domain(&activity).await?; let actor = ObjectId::::from(activity.actor().clone()) @@ -66,3 +62,12 @@ where .await?; Ok((activity, actor)) } + +/// Attempt to parse id field from serialized json +fn extract_id(data: &[u8]) -> serde_json::Result { + #[derive(Deserialize)] + struct Id { + id: Url, + } + Ok(serde_json::from_slice::(data)?.id) +}