Files
libreddit/src/client.rs
Daniel Valentine ae3ea2da7c HTTP compression (Reddit -> Libreddit -> client) (#612)
Implements HTTP compression, between both Reddit and Libreddit and Libreddit
and a web browser. Compression between Reddit and Libreddit is mandatory,
whereas compression between Libreddit and a client is opt-in (client must
specify a compressor in the Accept-Encoding header).

Supported compressors are gzip and brotli. gzip support is ubiquitous,
whereas brotli is supported by almost all modern browsers except Safari
(iOS, iPhone, macOS), although Safari may support brotli in the future.

Co-authored-by: Matthew E <matt@matthew.science>
2022-11-03 22:04:34 -06:00

214 lines
6.6 KiB
Rust

use cached::proc_macro::cached;
use futures_lite::{future::Boxed, FutureExt};
use hyper::{body, body::Buf, client, header, Body, Request, Response, Uri};
use libflate::gzip;
use percent_encoding::{percent_encode, CONTROLS};
use serde_json::Value;
use std::{io, result::Result};
use crate::server::RequestExt;
pub async fn proxy(req: Request<Body>, format: &str) -> Result<Response<Body>, String> {
let mut url = format!("{}?{}", format, req.uri().query().unwrap_or_default());
// For each parameter in request
for (name, value) in req.params().iter() {
// Fill the parameter value in the url
url = url.replace(&format!("{{{}}}", name), value);
}
stream(&url, &req).await
}
async fn stream(url: &str, req: &Request<Body>) -> Result<Response<Body>, String> {
// First parameter is target URL (mandatory).
let uri = url.parse::<Uri>().map_err(|_| "Couldn't parse URL".to_string())?;
// Prepare the HTTPS connector.
let https = hyper_rustls::HttpsConnectorBuilder::new().with_native_roots().https_only().enable_http1().build();
// Build the hyper client from the HTTPS connector.
let client: client::Client<_, hyper::Body> = client::Client::builder().build(https);
let mut builder = Request::get(uri);
// Copy useful headers from original request
for &key in &["Range", "If-Modified-Since", "Cache-Control"] {
if let Some(value) = req.headers().get(key) {
builder = builder.header(key, value);
}
}
let stream_request = builder.body(Body::empty()).map_err(|_| "Couldn't build empty body in stream".to_string())?;
client
.request(stream_request)
.await
.map(|mut res| {
let mut rm = |key: &str| res.headers_mut().remove(key);
rm("access-control-expose-headers");
rm("server");
rm("vary");
rm("etag");
rm("x-cdn");
rm("x-cdn-client-region");
rm("x-cdn-name");
rm("x-cdn-server-region");
rm("x-reddit-cdn");
rm("x-reddit-video-features");
res
})
.map_err(|e| e.to_string())
}
fn request(url: String, quarantine: bool) -> Boxed<Result<Response<Body>, String>> {
// Prepare the HTTPS connector.
let https = hyper_rustls::HttpsConnectorBuilder::new().with_native_roots().https_or_http().enable_http1().build();
// Construct the hyper client from the HTTPS connector.
let client: client::Client<_, hyper::Body> = client::Client::builder().build(https);
// Build request
let builder = Request::builder()
.method("GET")
.uri(&url)
.header("User-Agent", format!("web:libreddit:{}", env!("CARGO_PKG_VERSION")))
.header("Host", "www.reddit.com")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
.header("Accept-Encoding", "gzip") // Reddit doesn't do brotli yet.
.header("Accept-Language", "en-US,en;q=0.5")
.header("Connection", "keep-alive")
.header("Cookie", if quarantine { "_options=%7B%22pref_quarantine_optin%22%3A%20true%7D" } else { "" })
.body(Body::empty());
async move {
match builder {
Ok(req) => match client.request(req).await {
Ok(mut response) => {
if response.status().to_string().starts_with('3') {
request(
response
.headers()
.get("Location")
.map(|val| {
let new_url = percent_encode(val.as_bytes(), CONTROLS).to_string();
format!("{}{}raw_json=1", new_url, if new_url.contains('?') { "&" } else { "?" })
})
.unwrap_or_default()
.to_string(),
quarantine,
)
.await
} else {
match response.headers().get(header::CONTENT_ENCODING) {
// Content not compressed.
None => Ok(response),
// Content gzipped.
Some(hdr) => {
// Since we requested gzipped content, we expect
// to get back gzipped content. If we get
// back anything else, that's a problem.
if hdr.ne("gzip") {
return Err("Reddit response was encoded with an unsupported compressor".to_string());
}
// The body must be something that implements
// std::io::Read, hence the conversion to
// bytes::buf::Buf and then transformation into a
// Reader.
let mut decompressed: Vec<u8>;
{
let mut aggregated_body = match body::aggregate(response.body_mut()).await {
Ok(b) => b.reader(),
Err(e) => return Err(e.to_string()),
};
let mut decoder = match gzip::Decoder::new(&mut aggregated_body) {
Ok(decoder) => decoder,
Err(e) => return Err(e.to_string()),
};
decompressed = Vec::<u8>::new();
match io::copy(&mut decoder, &mut decompressed) {
Ok(_) => {}
Err(e) => return Err(e.to_string()),
};
}
response.headers_mut().remove(header::CONTENT_ENCODING);
response.headers_mut().insert(header::CONTENT_LENGTH, decompressed.len().into());
*(response.body_mut()) = Body::from(decompressed);
Ok(response)
}
}
}
}
Err(e) => Err(e.to_string()),
},
Err(_) => Err("Post url contains non-ASCII characters".to_string()),
}
}
.boxed()
}
// Make a request to a Reddit API and parse the JSON response
#[cached(size = 100, time = 30, result = true)]
pub async fn json(path: String, quarantine: bool) -> Result<Value, String> {
// Build Reddit url from path
let url = format!("https://www.reddit.com{}", path);
// Closure to quickly build errors
let err = |msg: &str, e: String| -> Result<Value, String> {
// eprintln!("{} - {}: {}", url, msg, e);
Err(format!("{}: {}", msg, e))
};
// Fetch the url...
match request(url.clone(), quarantine).await {
Ok(response) => {
let status = response.status();
// asynchronously aggregate the chunks of the body
match hyper::body::aggregate(response).await {
Ok(body) => {
// Parse the response from Reddit as JSON
match serde_json::from_reader(body.reader()) {
Ok(value) => {
let json: Value = value;
// If Reddit returned an error
if json["error"].is_i64() {
Err(
json["reason"]
.as_str()
.unwrap_or_else(|| {
json["message"].as_str().unwrap_or_else(|| {
eprintln!("{} - Error parsing reddit error", url);
"Error parsing reddit error"
})
})
.to_string(),
)
} else {
Ok(json)
}
}
Err(e) => {
if status.is_server_error() {
Err("Reddit is having issues, check if there's an outage".to_string())
} else {
err("Failed to parse page JSON data", e.to_string())
}
}
}
}
Err(e) => err("Failed receiving body from Reddit", e.to_string()),
}
}
Err(e) => err("Couldn't send request to Reddit", e),
}
}