text & doc support (+dynamic)

This commit is contained in:
Breval Ferrari 2025-04-06 10:41:22 -04:00
parent ae6e2edafa
commit 0c3439d788
No known key found for this signature in database
GPG key ID: F71E304D6400AB8E
6 changed files with 157 additions and 5 deletions

1
.gitignore vendored
View file

@ -1,3 +1,4 @@
/target
*.lock
bmp/out.*
fonts

View file

@ -34,6 +34,8 @@ printpdf = { version = "0.8.2", features = [
"js-sys",
"webp",
] }
shiva = "1.4.9"
anyhow = "1.0.97"
[dev-dependencies]
project-root = "0"

View file

@ -11,10 +11,12 @@ mod dynamic {
fs::File,
io::{self, Read, Write},
path::Path,
string::FromUtf8Error,
};
use crate::{
snd::{self, Audio},
txt::{ShivaDocument, Text},
Bendable, IntoDataBytes, TryFromDataBytes,
};
@ -25,20 +27,34 @@ mod dynamic {
use infer::MatcherType;
use printpdf::PdfDocument;
use shiva::core::{bytes, Document, DocumentType};
use strum::EnumDiscriminants;
use thiserror::Error;
#[derive(EnumDiscriminants)]
#[strum_discriminants(name(Format))]
pub enum DynamicBendable {
pub enum DynamicBendable<'a> {
Image(DynamicImage),
Binary(Bytes),
Sound(Audio),
Text,
Text(Text<'a>),
Doc(ShivaDocument),
Archive(PdfDocument),
Meta,
}
#[derive(Debug, Error)]
#[error("extension is unknown by Shiva")]
pub struct ShivaUnknownExtensionError;
#[derive(Debug, Error)]
pub enum ShivaError {
#[error("{0}")]
UnknownExtension(#[from] ShivaUnknownExtensionError),
#[error("{0}")]
Anyhow(#[from] anyhow::Error),
}
#[derive(Debug, Error)]
pub enum OpenError {
#[error("io: {0}")]
@ -49,6 +65,10 @@ mod dynamic {
Audio(#[from] snd::AudioOpenError),
#[error("pdf: {0}")]
Pdf(String),
#[error("text: {0}")]
Text(#[from] FromUtf8Error),
#[error("document: {0}")]
Document(#[from] ShivaError),
}
impl TryFromDataBytes for File {
@ -91,7 +111,7 @@ mod dynamic {
}
}
pub fn open<P: AsRef<Path>>(path: P) -> Result<Option<DynamicBendable>, OpenError> {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Option<DynamicBendable<'static>>, OpenError> {
use MatcherType::*;
infer::get_from_path(&path)?
.map(|t| (t.matcher_type(), t.extension()))
@ -106,7 +126,7 @@ mod dynamic {
})),
App => unreachable!(),
Audio => Some(DynamicBendable::Sound(crate::snd::Audio::open(path)?)),
Archive => Some(DynamicBendable::Archive(
Archive if extension == "pdf" => Some(DynamicBendable::Archive(
PdfDocument::try_from_data_bytes(
File::open(path)?
.bytes()
@ -116,10 +136,31 @@ mod dynamic {
)
.map_err(OpenError::Pdf)?,
)),
Archive => {
let document_type = DocumentType::from_extension(extension)
.ok_or(ShivaUnknownExtensionError)
.map_err(ShivaError::UnknownExtension)?;
Some(DynamicBendable::Doc(ShivaDocument::new(
Document::parse(
&bytes::Bytes::from(std::fs::read(path)?),
document_type,
)
.map_err(ShivaError::Anyhow)?,
document_type,
)))
}
Book => todo!(),
Doc => todo!(),
Font => todo!(),
Text => todo!(),
Text => Some(DynamicBendable::Text(
crate::txt::Text::try_from_data_bytes(
File::open(path)?
.bytes()
.collect::<Result<Bytes, io::Error>>()?,
(),
Default::default(),
)?,
)),
Video => todo!(),
Custom => None,
})

View file

@ -0,0 +1,4 @@
mod bare;
pub use bare::*;
mod shiva;
pub use shiva::*;

38
bingus/src/txt/bare.rs Normal file
View file

@ -0,0 +1,38 @@
use std::{borrow::Cow, string::FromUtf8Error};
use crate::{Bendable, Bytes, IntoDataBytes, TryFromDataBytes};
pub type Text<'a> = Cow<'a, str>;
impl TryFromDataBytes for Text<'_> {
type Error = FromUtf8Error;
type Format = ();
fn try_from_data_bytes(
bytes: Bytes,
_format: Self::Format,
_crop: crate::Crop,
) -> Result<Self, Self::Error>
where
Self: Sized,
{
String::from_utf8(bytes).map(Into::into)
}
}
impl IntoDataBytes for Text<'_> {
fn into_data_bytes(self) -> Bytes {
self.as_bytes().to_vec()
}
}
impl Bendable for Text<'_> {
type Unit = char;
fn map<F: Fn(&Self::Unit) -> Self::Unit + Sync>(self, f: F) -> Self {
self.chars().map(|c| f(&c)).collect::<String>().into()
}
fn format() -> crate::Format {
crate::Format::Text
}
}

66
bingus/src/txt/shiva.rs Normal file
View file

@ -0,0 +1,66 @@
use derive_new::new;
pub use shiva::core::DocumentType;
use shiva::core::{bytes::Bytes, Document, Element};
use crate::{Bendable, IntoDataBytes, TryFromDataBytes};
#[derive(new)]
pub struct ShivaDocument {
document: Document,
output_format: DocumentType,
}
#[derive(new)]
pub struct ShivaFormat {
input_format: DocumentType,
output_format: DocumentType,
}
impl TryFromDataBytes for ShivaDocument {
type Error = anyhow::Error;
type Format = ShivaFormat;
fn try_from_data_bytes(
bytes: crate::Bytes,
format: Self::Format,
_crop: crate::Crop,
) -> Result<Self, Self::Error>
where
Self: Sized,
{
Ok(ShivaDocument::new(
Document::parse(&Bytes::from(bytes), format.input_format)?,
format.output_format,
))
}
}
impl IntoDataBytes for ShivaDocument {
fn into_data_bytes(self) -> crate::Bytes {
self.document
.generate(self.output_format)
.expect("can't crash here! so close!")
.to_vec()
}
}
impl Bendable for ShivaDocument {
type Unit = Element;
fn map<F: Fn(&Self::Unit) -> Self::Unit + Sync>(self, f: F) -> Self {
ShivaDocument::new(
Document::new(
self.document
.get_all_elements()
.into_iter()
.map(f)
.collect(),
),
self.output_format,
)
}
fn format() -> crate::Format {
crate::Format::Text
}
}