Add security checks when fetching objects (#95)

* Add security checks when fetching objects

* static

* as ref

* update comment

* fix header

* Update comment
This commit is contained in:
Nutomic 2024-03-04 12:47:42 +01:00 committed by GitHub
parent f907b6efa7
commit a859db05bb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 63 additions and 17 deletions

View file

@ -1,13 +1,11 @@
//! Error messages returned by this library //! Error messages returned by this library
use std::string::FromUtf8Error; use crate::fetch::webfinger::WebFingerError;
use http_signature_normalization_reqwest::SignError; use http_signature_normalization_reqwest::SignError;
use openssl::error::ErrorStack; use openssl::error::ErrorStack;
use std::string::FromUtf8Error;
use url::Url; use url::Url;
use crate::fetch::webfinger::WebFingerError;
/// Error messages returned by this library /// Error messages returned by this library
#[derive(thiserror::Error, Debug)] #[derive(thiserror::Error, Debug)]
pub enum Error { pub enum Error {
@ -62,6 +60,14 @@ pub enum Error {
/// Signing errors /// Signing errors
#[error(transparent)] #[error(transparent)]
SignError(#[from] SignError), SignError(#[from] SignError),
/// Attempted to fetch object which doesn't have valid ActivityPub Content-Type
#[error(
"Attempted to fetch object from {0} which doesn't have valid ActivityPub Content-Type"
)]
FetchInvalidContentType(Url),
/// Attempted to fetch object but the response's id field doesn't match
#[error("Attempted to fetch object from {0} but the response's id field doesn't match")]
FetchWrongId(Url),
/// Other generic errors /// Other generic errors
#[error("{0}")] #[error("{0}")]
Other(String), Other(String),

View file

@ -5,12 +5,13 @@
use crate::{ use crate::{
config::Data, config::Data,
error::{Error, Error::ParseFetchedObject}, error::{Error, Error::ParseFetchedObject},
extract_id,
http_signatures::sign_request, http_signatures::sign_request,
reqwest_shim::ResponseExt, reqwest_shim::ResponseExt,
FEDERATION_CONTENT_TYPE, FEDERATION_CONTENT_TYPE,
}; };
use bytes::Bytes; use bytes::Bytes;
use http::StatusCode; use http::{HeaderValue, StatusCode};
use serde::de::DeserializeOwned; use serde::de::DeserializeOwned;
use std::sync::atomic::Ordering; use std::sync::atomic::Ordering;
use tracing::info; use tracing::info;
@ -29,6 +30,8 @@ pub struct FetchObjectResponse<Kind> {
pub object: Kind, pub object: Kind,
/// Contains the final URL (different from request URL in case of redirect) /// Contains the final URL (different from request URL in case of redirect)
pub url: Url, pub url: Url,
content_type: Option<HeaderValue>,
object_id: Option<Url>,
} }
/// Fetch a remote object over HTTP and convert to `Kind`. /// Fetch a remote object over HTTP and convert to `Kind`.
@ -42,12 +45,32 @@ pub struct FetchObjectResponse<Kind> {
/// [Error::RequestLimit]. This prevents denial of service attacks where an attack triggers /// [Error::RequestLimit]. This prevents denial of service attacks where an attack triggers
/// infinite, recursive fetching of data. /// infinite, recursive fetching of data.
/// ///
/// The `Accept` header will be set to the content of [`FEDERATION_CONTENT_TYPE`]. /// The `Accept` header will be set to the content of [`FEDERATION_CONTENT_TYPE`]. When parsing the
/// response it ensures that it has a valid `Content-Type` header as defined by ActivityPub, to
/// prevent security vulnerabilities like [this one](https://github.com/mastodon/mastodon/security/advisories/GHSA-jhrq-qvrm-qr36).
/// Additionally it checks that the `id` field is identical to the fetch URL (after redirects).
pub async fn fetch_object_http<T: Clone, Kind: DeserializeOwned>( pub async fn fetch_object_http<T: Clone, Kind: DeserializeOwned>(
url: &Url, url: &Url,
data: &Data<T>, data: &Data<T>,
) -> Result<FetchObjectResponse<Kind>, Error> { ) -> Result<FetchObjectResponse<Kind>, Error> {
fetch_object_http_with_accept(url, data, FEDERATION_CONTENT_TYPE).await static CONTENT_TYPE: HeaderValue = HeaderValue::from_static(FEDERATION_CONTENT_TYPE);
static ALT_CONTENT_TYPE: HeaderValue = HeaderValue::from_static(
r#"application/ld+json; profile="https://www.w3.org/ns/activitystreams""#,
);
let res = fetch_object_http_with_accept(url, data, &CONTENT_TYPE).await?;
// Ensure correct content-type to prevent vulnerabilities.
if res.content_type.as_ref() != Some(&CONTENT_TYPE)
&& res.content_type.as_ref() != Some(&ALT_CONTENT_TYPE)
{
return Err(Error::FetchInvalidContentType(res.url));
}
// Ensure id field matches final url
if res.object_id.as_ref() != Some(&res.url) {
return Err(Error::FetchWrongId(res.url));
}
Ok(res)
} }
/// Fetch a remote object over HTTP and convert to `Kind`. This function works exactly as /// Fetch a remote object over HTTP and convert to `Kind`. This function works exactly as
@ -55,7 +78,7 @@ pub async fn fetch_object_http<T: Clone, Kind: DeserializeOwned>(
async fn fetch_object_http_with_accept<T: Clone, Kind: DeserializeOwned>( async fn fetch_object_http_with_accept<T: Clone, Kind: DeserializeOwned>(
url: &Url, url: &Url,
data: &Data<T>, data: &Data<T>,
content_type: &str, content_type: &HeaderValue,
) -> Result<FetchObjectResponse<Kind>, Error> { ) -> Result<FetchObjectResponse<Kind>, Error> {
let config = &data.config; let config = &data.config;
// dont fetch local objects this way // dont fetch local objects this way
@ -93,9 +116,17 @@ async fn fetch_object_http_with_accept<T: Clone, Kind: DeserializeOwned>(
} }
let url = res.url().clone(); let url = res.url().clone();
let content_type = res.headers().get("Content-Type").cloned();
let text = res.bytes_limited().await?; let text = res.bytes_limited().await?;
let object_id = extract_id(&text).ok();
match serde_json::from_slice(&text) { match serde_json::from_slice(&text) {
Ok(object) => Ok(FetchObjectResponse { object, url }), Ok(object) => Ok(FetchObjectResponse {
object,
url,
content_type,
object_id,
}),
Err(e) => Err(ParseFetchedObject( Err(e) => Err(ParseFetchedObject(
e, e,
url, url,

View file

@ -5,6 +5,7 @@ use crate::{
traits::{Actor, Object}, traits::{Actor, Object},
FEDERATION_CONTENT_TYPE, FEDERATION_CONTENT_TYPE,
}; };
use http::HeaderValue;
use itertools::Itertools; use itertools::Itertools;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex; use regex::Regex;
@ -33,6 +34,9 @@ impl WebFingerError {
} }
} }
/// The content-type for webfinger responses.
pub static WEBFINGER_CONTENT_TYPE: HeaderValue = HeaderValue::from_static("application/jrd+json");
/// Takes an identifier of the form `name@example.com`, and returns an object of `Kind`. /// Takes an identifier of the form `name@example.com`, and returns an object of `Kind`.
/// ///
/// For this the identifier is first resolved via webfinger protocol to an Activitypub ID. This ID /// For this the identifier is first resolved via webfinger protocol to an Activitypub ID. This ID
@ -58,7 +62,7 @@ where
let res: Webfinger = fetch_object_http_with_accept( let res: Webfinger = fetch_object_http_with_accept(
&Url::parse(&fetch_url).map_err(Error::UrlParse)?, &Url::parse(&fetch_url).map_err(Error::UrlParse)?,
data, data,
"application/jrd+json", &WEBFINGER_CONTENT_TYPE,
) )
.await? .await?
.object; .object;

View file

@ -35,7 +35,7 @@ use serde::{de::DeserializeOwned, Deserialize};
use url::Url; use url::Url;
/// Mime type for Activitypub data, used for `Accept` and `Content-Type` HTTP headers /// Mime type for Activitypub data, used for `Accept` and `Content-Type` HTTP headers
pub static FEDERATION_CONTENT_TYPE: &str = "application/activity+json"; pub const FEDERATION_CONTENT_TYPE: &str = "application/activity+json";
/// Deserialize incoming inbox activity to the given type, perform basic /// Deserialize incoming inbox activity to the given type, perform basic
/// validation and extract the actor. /// validation and extract the actor.
@ -53,12 +53,8 @@ where
{ {
let activity: Activity = serde_json::from_slice(body).map_err(|e| { let activity: Activity = serde_json::from_slice(body).map_err(|e| {
// Attempt to include activity id in error message // Attempt to include activity id in error message
#[derive(Deserialize)] let id = extract_id(body).ok();
struct Id { Error::ParseReceivedActivity(e, id)
id: Url,
}
let id = serde_json::from_slice::<Id>(body).ok();
Error::ParseReceivedActivity(e, id.map(|i| i.id))
})?; })?;
data.config.verify_url_and_domain(&activity).await?; data.config.verify_url_and_domain(&activity).await?;
let actor = ObjectId::<ActorT>::from(activity.actor().clone()) let actor = ObjectId::<ActorT>::from(activity.actor().clone())
@ -66,3 +62,12 @@ where
.await?; .await?;
Ok((activity, actor)) Ok((activity, actor))
} }
/// Attempt to parse id field from serialized json
fn extract_id(data: &[u8]) -> serde_json::Result<Url> {
#[derive(Deserialize)]
struct Id {
id: Url,
}
Ok(serde_json::from_slice::<Id>(data)?.id)
}