Skip to content

Implement a File Link Resolver #5981

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: krishna/refactor-main
Choose a base branch
from
223 changes: 223 additions & 0 deletions graph/src/components/link_resolver/file.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
use std::path::{Path, PathBuf};
use std::time::Duration;

use anyhow::anyhow;
use async_trait::async_trait;
use slog::Logger;

use crate::data::subgraph::Link;
use crate::prelude::{Error, JsonValueStream, LinkResolver as LinkResolverTrait};

#[derive(Clone, Debug)]
pub struct FileLinkResolver {
base_dir: Option<PathBuf>,
timeout: Duration,
}

impl FileLinkResolver {
/// Create a new FileLinkResolver
///
/// All paths are treated as absolute paths.
pub fn new() -> Self {
Self {
base_dir: None,
timeout: Duration::from_secs(30),
}
}

/// Create a new FileLinkResolver with a base directory
///
/// All paths that are not absolute will be considered
/// relative to this base directory.
pub fn with_base_dir<P: AsRef<Path>>(base_dir: P) -> Self {
Self {
base_dir: Some(base_dir.as_ref().to_owned()),
timeout: Duration::from_secs(30),
}
}

fn resolve_path(&self, link: &str) -> PathBuf {
let path = Path::new(link);

// If the path is already absolute or if we don't have a base_dir, return it as is
if path.is_absolute() || self.base_dir.is_none() {
path.to_owned()
} else {
// Otherwise, join with base_dir
self.base_dir.as_ref().unwrap().join(link)
}
}
}

pub fn remove_prefix(link: &str) -> &str {
if link.starts_with("/ipfs/") {
&link[6..] // Skip the "/ipfs/" prefix (6 characters)
} else {
link
}
}

#[async_trait]
impl LinkResolverTrait for FileLinkResolver {
fn with_timeout(&self, timeout: Duration) -> Box<dyn LinkResolverTrait> {
let mut resolver = self.clone();
resolver.timeout = timeout;
Box::new(resolver)
}

fn with_retries(&self) -> Box<dyn LinkResolverTrait> {
Box::new(self.clone())
}

async fn cat(&self, logger: &Logger, link: &Link) -> Result<Vec<u8>, Error> {
let link = remove_prefix(&link.link);
let path = self.resolve_path(&link);

slog::debug!(logger, "File resolver: reading file";
"path" => path.to_string_lossy().to_string());

match tokio::fs::read(&path).await {
Ok(data) => Ok(data),
Err(e) => {
slog::error!(logger, "Failed to read file";
"path" => path.to_string_lossy().to_string(),
"error" => e.to_string());
Err(anyhow!("Failed to read file {}: {}", path.display(), e).into())
}
}
}

async fn get_block(&self, _logger: &Logger, _link: &Link) -> Result<Vec<u8>, Error> {
Err(anyhow!("get_block is not implemented for FileLinkResolver").into())
}

async fn json_stream(&self, _logger: &Logger, _link: &Link) -> Result<JsonValueStream, Error> {
Err(anyhow!("json_stream is not implemented for FileLinkResolver").into())
}
}

#[cfg(test)]
mod tests {
use super::*;
use std::env;
use std::fs;
use std::io::Write;

#[tokio::test]
async fn test_file_resolver_absolute() {
// Test the resolver without a base directory (absolute paths only)

// Create a temporary directory for test files
let temp_dir = env::temp_dir().join("file_resolver_test");
let _ = fs::create_dir_all(&temp_dir);

// Create a test file in the temp directory
let test_file_path = temp_dir.join("test.txt");
let test_content = b"Hello, world!";
let mut file = fs::File::create(&test_file_path).unwrap();
file.write_all(test_content).unwrap();

// Create a resolver without a base directory
let resolver = FileLinkResolver::new();
let logger = slog::Logger::root(slog::Discard, slog::o!());

// Test valid path resolution
let link = Link {
link: test_file_path.to_string_lossy().to_string(),
};
let resolved_path = resolver.resolve_path(&link.link);
println!("Absolute mode - Resolved path: {:?}", resolved_path);
let result = resolver.cat(&logger, &link).await.unwrap();
assert_eq!(result, test_content);

// Test path with leading slash that likely doesn't exist
let link = Link {
link: "/test.txt".to_string(),
};
let resolved_path = resolver.resolve_path(&link.link);
println!(
"Absolute mode - Path with leading slash: {:?}",
resolved_path
);
let result = resolver.cat(&logger, &link).await;
assert!(
result.is_err(),
"Reading /test.txt should fail as it doesn't exist"
);

// Clean up
let _ = fs::remove_file(test_file_path);
let _ = fs::remove_dir(temp_dir);
}

#[tokio::test]
async fn test_file_resolver_with_base_dir() {
// Test the resolver with a base directory

// Create a temporary directory for test files
let temp_dir = env::temp_dir().join("file_resolver_test_base_dir");
let _ = fs::create_dir_all(&temp_dir);

// Create a test file in the temp directory
let test_file_path = temp_dir.join("test.txt");
let test_content = b"Hello from base dir!";
let mut file = fs::File::create(&test_file_path).unwrap();
file.write_all(test_content).unwrap();

// Create a resolver with a base directory
let resolver = FileLinkResolver::with_base_dir(&temp_dir);
let logger = slog::Logger::root(slog::Discard, slog::o!());

println!("Base directory mode - base dir: {:?}", temp_dir);

// Test relative path (no leading slash)
let link = Link {
link: "test.txt".to_string(),
};
let resolved_path = resolver.resolve_path(&link.link);
println!(
"Base directory mode - Resolved relative path: {:?}",
resolved_path
);
let result = resolver.cat(&logger, &link).await.unwrap();
assert_eq!(result, test_content);

// Test relative path with leading slash (should be treated as absolute on Unix)
let link = Link {
link: "/test.txt".to_string(),
};
let resolved_path = resolver.resolve_path(&link.link);
println!(
"Base directory mode - Resolved path with leading slash: {:?}",
resolved_path
);

println!(
"Result for path with leading slash: {:?}",
resolver.cat(&logger, &link).await
);

// Test absolute path
let link = Link {
link: test_file_path.to_string_lossy().to_string(),
};
let resolved_path = resolver.resolve_path(&link.link);
println!(
"Base directory mode - Resolved absolute path: {:?}",
resolved_path
);
let result = resolver.cat(&logger, &link).await.unwrap();
assert_eq!(result, test_content);

// Test missing file
let link = Link {
link: "missing.txt".to_string(),
};
let result = resolver.cat(&logger, &link).await;
assert!(result.is_err());

// Clean up
let _ = fs::remove_file(test_file_path);
let _ = fs::remove_dir(temp_dir);
}
}
2 changes: 2 additions & 0 deletions graph/src/components/link_resolver/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@ use crate::prelude::Error;
use std::fmt::Debug;

mod arweave;
mod file;
mod ipfs;

pub use arweave::*;
use async_trait::async_trait;
pub use file::*;
pub use ipfs::*;

/// Resolves links to subgraph manifests and resources referenced by them.
Expand Down
86 changes: 71 additions & 15 deletions graph/src/data/subgraph/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,21 +116,24 @@ impl DeploymentHash {
pub fn new(s: impl Into<String>) -> Result<Self, String> {
let s = s.into();

// Enforce length limit
if s.len() > 46 {
return Err(s);
}
// This section is being temporarily commented out. This is to allow file link resolver to work
// TODO(krishna): Figure out how to do this better or remove this check

// Check that the ID contains only allowed characters.
if !s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
return Err(s);
}
// // Enforce length limit
// if s.len() > 46 {
// return Err(s);
// }

// Allow only deployment id's for 'real' subgraphs, not the old
// metadata subgraph.
if s == "subgraphs" {
return Err(s);
}
// // Check that the ID contains only allowed characters.
// if !s.chars().all(|c| c.is_ascii_alphanumeric() || c == '_') {
// return Err(s);
// }

// // Allow only deployment id's for 'real' subgraphs, not the old
// // metadata subgraph.
// if s == "subgraphs" {
// return Err(s);
// }

Ok(DeploymentHash(s))
}
Expand Down Expand Up @@ -397,12 +400,65 @@ impl From<HashMap<Word, Value>> for DataSourceContext {
}

/// IPLD link.
#[derive(Clone, Debug, Default, Hash, Eq, PartialEq, Deserialize)]
#[derive(Clone, Debug, Default, Hash, Eq, PartialEq)]
pub struct Link {
#[serde(rename = "/")]
pub link: String,
}

/// Custom deserializer for Link
/// This handles both formats:
/// 1. Simple string: "schema.graphql" or "subgraph.yaml" which is used in [`FileLinkResolver`]
/// FileLinkResolver is used in local development environments
/// 2. IPLD format: { "/": "Qm..." } which is used in [`IpfsLinkResolver`]
impl<'de> de::Deserialize<'de> for Link {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: de::Deserializer<'de>,
{
struct LinkVisitor;

impl<'de> de::Visitor<'de> for LinkVisitor {
type Value = Link;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("string or map with '/' key")
}

fn visit_str<E>(self, value: &str) -> Result<Link, E>
where
E: de::Error,
{
Ok(Link {
link: value.to_string(),
})
}

fn visit_map<A>(self, mut map: A) -> Result<Link, A::Error>
where
A: de::MapAccess<'de>,
{
let mut link = None;

while let Some(key) = map.next_key::<String>()? {
if key == "/" {
if link.is_some() {
return Err(de::Error::duplicate_field("/"));
}
link = Some(map.next_value()?);
} else {
return Err(de::Error::unknown_field(&key, &["/"]));
}
}

link.map(|l: String| Link { link: l })
.ok_or_else(|| de::Error::missing_field("/"))
}
}

deserializer.deserialize_any(LinkVisitor)
}
}

impl<S: ToString> From<S> for Link {
fn from(s: S) -> Self {
Self {
Expand Down
15 changes: 15 additions & 0 deletions tests/runner-tests/file-link-resolver/abis/Contract.abi
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[
{
"anonymous": false,
"inputs": [
{
"indexed": false,
"internalType": "string",
"name": "testCommand",
"type": "string"
}
],
"name": "TestEvent",
"type": "event"
}
]
13 changes: 13 additions & 0 deletions tests/runner-tests/file-link-resolver/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
{
"name": "file-link-resolver",
"version": "0.1.0",
"scripts": {
"codegen": "graph codegen --skip-migrations",
"create:test": "graph create test/file-link-resolver --node $GRAPH_NODE_ADMIN_URI",
"deploy:test": "graph deploy test/file-link-resolver --version-label v0.0.1 --ipfs $IPFS_URI --node $GRAPH_NODE_ADMIN_URI"
},
"devDependencies": {
"@graphprotocol/graph-cli": "0.60.0",
"@graphprotocol/graph-ts": "0.31.0"
}
}
5 changes: 5 additions & 0 deletions tests/runner-tests/file-link-resolver/schema.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
type Block @entity {
id: ID!
number: BigInt!
hash: Bytes!
}
11 changes: 11 additions & 0 deletions tests/runner-tests/file-link-resolver/src/mapping.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { ethereum, log } from "@graphprotocol/graph-ts";
import { Block } from "../generated/schema";

export function handleBlock(block: ethereum.Block): void {
log.info("Processing block: {}", [block.number.toString()]);

let blockEntity = new Block(block.number.toString());
blockEntity.number = block.number;
blockEntity.hash = block.hash;
blockEntity.save();
}
Loading