my attempt at a subtitle utility after all the existing ones I tried failed me
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
srtune/src/main.rs

714 lines
27 KiB

#[macro_use]
extern crate log;
#[macro_use]
extern crate lazy_static;
use regex::Regex;
use std::fs::{File, OpenOptions};
use std::io::{BufReader, Write};
use std::io;
use std::str::FromStr;
use std::convert::TryFrom;
use std::io::BufRead;
use std::ops::{Add, Mul, MulAssign, AddAssign, SubAssign, Sub};
use std::fmt::{self, Display};
use serde::export::fmt::Debug;
const LOG_LEVELS: [&str; 5] = ["error", "warn", "info", "debug", "trace"];
const SPAMMY_LIBS: [&str; 5] = ["tokio_reactor", "hyper", "reqwest", "mio", "want"];
fn main() {
let argv =
clap::App::new("srtune")
.version(env!("CARGO_PKG_VERSION"))
.about("Modify a .srt file to match a video. Input and output can be a file or stream, \
so you pipe multiple invocations to create more complex operations. However, a single \
invocation should suffice in most cases.\n\
Times can be specified in any format: \
seconds (400, 14.52), hours:minutes:seconds (14:00, 15:51.12, 1:30:00). Decimal point \
can be period or comma; Times copied directly from the .srt file will also work.\n\
When a command allows both time and index as a value, index must be prefixed with '@'.\
The tool should be used iteratively, adjusting the invocation until the generated \
subtitle file meets expectations. As such, times and indices accepted by its parameters \
are, by default, the ones seen in the output file. Prefix a time or index with '^' \
to use the original value from the input instead (i.e. original index 14 is '^@14')
")
.arg(clap::Arg::with_name("input")
.value_name("INFILE")
.help("Input file, leave out for stdin"),
)
.arg(clap::Arg::with_name("output")
.short("o")
.long("output")
.value_name("OUTFILE")
.help("Output file, defaults to stdout"),
)
.arg(clap::Arg::with_name("from")
.short("f")
.long("from-time")
.value_name("TIME")
.help("Time of the first affected entry, or its index. Defaults to 0/^@0."),
)
.arg(clap::Arg::with_name("move")
.short("m")
.long("move")
.value_name("TIME")
.help("Move subtitles in time. Use seconds, M:S or H:M:S, decimals and minus \
are supported. Starts at the point specified by '--from'"),
)
.arg(clap::Arg::with_name("automove")
.short("M")
.long("automove")
.value_name("SUBTIME=VIDEOTIME")
.multiple(true)
.help("Move subtitles following a given time or index to match video times. \
This automatically sets '--from' if not given explicitly."),
)
.arg(clap::Arg::with_name("scale")
.short("s")
.long("scale")
.value_name("RATIO")
.help("Scale subtitle times and durations to compensate for bitrate \
differences. 1 means identity, 1.1 makes all times 10% longer. Scaling is \
relative to the first emitted subtitle with positive time (after shifting). \
Has no effect if '--autoscale' is used."),
)
.arg(clap::Arg::with_name("autoscale")
.short("S")
.long("autoscale")
.value_name("SUBTIME=VIDEOTIME")
.help("Calculate scaling based on a perceived difference. The scaling is \
related to the first emitted subtitle, so ensure it is aligned properly \
with '--move'."),
)
.arg(clap::Arg::with_name("durscale")
.short("d")
.long("durscale")
.value_name("RATIO")
.help("Scale durations, can be combined with '--scale' or '--autoscale'. The \
given value will always be multiplied by the absolute time scale. 1 means \
identity, 1.1 makes all times 10% longer."),
)
.arg(clap::Arg::with_name("renumber")
.short("r")
.long("renumber")
.help("Change all numbers to be sequential starting with 1"),
)
.arg(clap::Arg::with_name("v").short("v").multiple(true).help(
"Sets the level of verbosity (adds to the default - info)",
))
.get_matches();
let mut log_level = "info".to_owned();
if argv.is_present("v") {
// bump verbosity if -v's are present
let pos = LOG_LEVELS
.iter()
.position(|x| x == &log_level)
.unwrap();
log_level = match LOG_LEVELS
.iter()
.nth(pos + argv.occurrences_of("v") as usize)
{
Some(new_level) => new_level.to_string(),
None => "trace".to_owned(),
};
}
//println!("LEVEL={}", log_level);
// init logging
let env = env_logger::Env::default().default_filter_or(log_level);
let mut builder = env_logger::Builder::from_env(env);
let lib_level = log::LevelFilter::Info;
for lib in &SPAMMY_LIBS {
builder.filter_module(lib, lib_level);
}
builder.init();
let from = match argv.value_of("from") {
Some(mut s) => {
if s.starts_with('^') {
s = &s[1..];
if s.starts_with('@') {
let index = &s[1..].parse().expect("Bad --from format");
FromTag::ByIndexOrig(*index)
} else {
// this is always the orig time
FromTag::ByTime(SubDuration::try_from(s).expect("Bad --from format").as_instant())
}
} else {
if s.starts_with('@') {
let index = &s[1..].parse().expect("Bad --from format");
FromTag::ByIndex(*index)
} else{
FromTag::ByTime(SubDuration::try_from(s).expect("Bad --from format").as_instant())
}
}
}
None => FromTag::ByIndex(0)
};
let shift = match argv.value_of("move") {
Some(s) => {
SubDuration::try_from(s).expect("Bad --move format")
}
None => SubDuration(0f32)
};
let scale = match argv.value_of("scale") {
Some(s) => {
s.parse().expect("Bad --scale format")
}
None => 1f32
};
let durscale = match argv.value_of("durscale") {
Some(s) => {
s.parse().expect("Bad --durscale format")
}
None => 1f32
}; // always also shrink durations
let autoscale = match argv.value_of("autoscale") {
Some(s) => {
let halves : Vec<&str> = s.split("=").collect();
if halves.len() != 2 {
panic!("Bad --autoscale format, should be SUBTIME=VIDEOTIME")
}
let (first, second) = (halves[0], halves[1]);
if first.starts_with('^') {
panic!("'--autoscale' always uses original times");
}
let subtime = SubDuration::try_from(first).expect("Bad --autoscale format").as_instant();
let vidtime = SubDuration::try_from(second).expect("Bad --autoscale format").as_instant();
Some((subtime, vidtime))
}
None => None
};
let mut automove = Vec::<AutoMoveTag>::new();
match argv.values_of("automove") {
Some(ss) => {
for s in ss {
let halves: Vec<&str> = s.split("=").collect();
if halves.len() != 2 {
panic!("Bad --automove format, should be SUBTIME=VIDEOTIME")
}
let (mut first, second) = (halves[0], halves[1]);
let vidtime = SubDuration::try_from(second).expect("Bad --automove format").as_instant();
if first.starts_with('^') {
first = &first[1..];
if s.starts_with('@') {
let index = &first[1..].parse().expect("Bad --automove format");
automove.push(AutoMoveTag::ByIndexOrig(*index, vidtime));
} else {
let subtime = SubDuration::try_from(first).expect("Bad --automove format").as_instant();
automove.push(AutoMoveTag::ByTimeOrig(subtime, vidtime));
}
} else {
if s.starts_with('@') {
let index = &first[1..].parse().expect("Bad --automove format");
automove.push(AutoMoveTag::ByIndex(*index, vidtime));
} else{
let subtime = SubDuration::try_from(first).expect("Bad --automove format").as_instant();
automove.push(AutoMoveTag::ByTime(subtime, vidtime));
}
}
}
}
None => (/* no automoves */)
}
let inf = argv.value_of("input");
let outf = argv.value_of("output");
let stdin = io::stdin();
let stdout = io::stdout();
let mut lines_iterator: Box<dyn Iterator<Item=Result<String, io::Error>>> = match inf {
None => {
Box::new(stdin.lock().lines())
}
Some(f) => {
let file = File::open(f).expect(&format!("Could not open file: {:?}", f));
Box::new(BufReader::new(file).lines())
}
};
let mut outfile: Box<dyn Write> = match outf {
None => {
Box::new(stdout.lock())
}
Some(f) => {
let file = OpenOptions::new()
.create(true)
.truncate(true)
.write(true)
.open(f)
.expect(&format!("Could not open file: {:?}", f));
Box::new(file)
}
};
let renumber = argv.is_present("renumber");
transform_subtitles(&mut lines_iterator, &mut outfile, TransformOpts {
renumber,
autoscale,
durscale,
scale,
shift,
from,
automove,
});
}
#[derive(Debug)]
struct TransformOpts {
renumber: bool,
autoscale: Option<(SubInstant, SubInstant)>,
durscale: f32,
scale: f32,
shift: SubDuration,
automove: Vec<AutoMoveTag>,
from: FromTag,
}
#[derive(Debug)]
enum AutoMoveTag {
ByTime(SubInstant, SubInstant),
ByTimeOrig(SubInstant, SubInstant),
ByIndex(u32, SubInstant),
ByIndexOrig(u32, SubInstant),
ByIndexRelative(u32, SubDuration),
}
#[derive(Debug)]
enum FromTag {
ByTime(SubInstant),
ByIndex(u32),
ByIndexOrig(u32)
}
#[derive(Debug,Default,Clone,Copy)]
struct IterState {
start_time : Option<SubInstant>,
renumber_i : u32,
}
fn transform_subtitles<'a>(lines : &mut Box<dyn Iterator<Item=Result<String, io::Error>> + 'a>,
outfile : &mut Box<dyn Write + 'a>,
mut opts : TransformOpts) {
debug!("Opts: {:#?}", opts);
let mut istate = IterState::default();
let mut linebuf = vec![];
while let Some(Ok(x)) = lines.next() {
let mut x = x.trim();
if x.starts_with('\u{feff}') {
debug!("Stripping BOM mark");
x = &x[3..];
}
let x = x.trim();
if x.is_empty() {
continue;
}
let istate_backup = istate;
// 236
// 00:18:01,755 --> 00:18:03,774
// (掃除機の音)
// う~ん…。
match u32::from_str(x) {
Ok(num_orig) => {
// println!("Entry {}", num);
let datesrow = lines.next().unwrap().unwrap();
if datesrow.contains(" --> ") {
let mut halves = datesrow.split(" --> ");
let (first, second) = (halves.next().unwrap(), halves.next().unwrap());
let sub_start = SubInstant::try_from(first).unwrap();
let sub_end = SubInstant::try_from(second).unwrap();
linebuf.clear();
while let Some(Ok(x)) = lines.next() {
if x.is_empty() {
break; // space between the entries
}
linebuf.push(x);
}
let num_new = if opts.renumber {
istate.renumber_i += 1;
istate.renumber_i
} else {
num_orig
};
// advance numbering only for the really emitted entries
let mut subtitle = Subtitle {
num : num_new,
start: sub_start,
dur: sub_end - sub_start,
text: linebuf.join("\n"),
};
if match opts.from {
FromTag::ByTime(ins) => ins <= sub_start,
FromTag::ByIndex(idx) => idx <= num_new,
FromTag::ByIndexOrig(idx) => idx <= num_orig,
} {
if istate.start_time.is_none() {
debug!("Scaling anchored at {} (#{}), editing starts", sub_start, num_orig);
debug!("Shifting by: {}", opts.shift);
istate.start_time = Some(sub_start);
if let Some((mut subt, mut vidt)) = opts.autoscale {
debug!("Autoscale: VT {} -> ST {}", vidt, subt);
subt -= sub_start;
vidt -= sub_start + opts.shift;
if subt.0 <= 0f32 {
panic!("Error in autoscale, start time is negative or zero.");
}
if vidt.0 <= 0f32 {
panic!("Error in autoscale, end time is negative or zero.");
}
debug!(" relative to #{}, after \"move\": VT {} -> ST {}", num_orig, vidt, subt);
opts.scale = vidt.0 / subt.0;
debug!("Resolved scale as {}", opts.scale);
}
opts.durscale *= opts.scale;
debug!("Duration scaling is {}", opts.durscale);
}
if opts.scale != 1f32 {
subtitle.start = subtitle.start.scale(istate.start_time.unwrap(), opts.scale);
}
subtitle.dur *= opts.durscale;
subtitle.start += opts.shift;
for amove in opts.automove.iter_mut() {
match amove {
AutoMoveTag::ByIndex(idx, ref vidt) => {
if num_new >= *idx {
debug!("Move by new index starts, reached {}", idx);
let vidt = *vidt;
let dif = vidt - subtitle.start;
subtitle.start = vidt;
std::mem::replace(amove, AutoMoveTag::ByIndexRelative(num_new, dif));
} else if *vidt < subtitle.start && *idx > num_new {
// istate = istate_backup;
warn!("Skip overlapped #{} (by index)", num_orig);
continue;
}
}
AutoMoveTag::ByIndexOrig(idx, ref vidt) => {
if num_orig >= *idx {
debug!("Move by orig index starts, reached {}", idx);
let vidt = *vidt;
let dif = vidt - subtitle.start;
subtitle.start = vidt;
std::mem::replace(amove, AutoMoveTag::ByIndexRelative(num_new, dif));
} else if *vidt < sub_start && *idx > num_orig {
// istate = istate_backup;
warn!("Skip overlapped #{} (by orig index)", num_orig);
continue;
}
}
AutoMoveTag::ByTime(ref subt, ref vidt) => {
if subtitle.start >= *vidt {
// TODO verify
subtitle.start += *vidt - *subt;
} else if *vidt < subtitle.start && *subt > subtitle.start {
// istate = istate_backup;
warn!("Skip overlapped #{} (by time)", num_orig);
continue;
}
}
AutoMoveTag::ByTimeOrig(ref subt, ref vidt) => {
if sub_start >= *subt {
// TODO verify
subtitle.start += *vidt - *subt;
} else if *vidt < subtitle.start && *subt > sub_start {
// istate = istate_backup;
warn!("Skip overlapped #{} (by orig time)", num_orig);
continue;
}
},
// this is used internally
AutoMoveTag::ByIndexRelative(ref idx, ref dif) => {
if num_new >= *idx {
subtitle.start += *dif;
}
},
}
}
}
if subtitle.start.0 < 0f32 {
warn!("Discarding negative time entry #{} @ {:.3}s", subtitle.num, subtitle.start.0);
istate = istate_backup;
continue;
}
outfile.write(subtitle.to_string().as_bytes()).expect("failed to write");
}
}
Err(e) => {
error!("couldnt parse >{}<: {}", x, e);
for b in x.as_bytes() {
error!("{:#02x} - {}", b, b);
}
error!("\n");
}
}
}
outfile.flush().unwrap();
}
#[derive(Copy, Clone, PartialEq, PartialOrd)]
struct SubInstant(f32);
impl Debug for SubInstant {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "Time({})", self)
}
}
#[derive(Copy, Clone, PartialEq, PartialOrd)]
struct SubDuration(f32);
impl Debug for SubDuration {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "Duration({})", self)
}
}
impl Display for SubDuration {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}", SubInstant(self.0))
}
}
impl Add<SubDuration> for SubInstant {
type Output = SubInstant;
fn add(self, rhs: SubDuration) -> Self::Output {
SubInstant(self.0 + rhs.0)
}
}
impl Sub<SubDuration> for SubInstant {
type Output = SubInstant;
fn sub(self, rhs: SubDuration) -> Self::Output {
SubInstant(self.0 - rhs.0)
}
}
impl Sub<SubInstant> for SubInstant {
type Output = SubDuration;
fn sub(self, rhs: SubInstant) -> Self::Output {
SubDuration(self.0 - rhs.0)
}
}
impl Mul<f32> for SubDuration {
type Output = SubDuration;
fn mul(self, rhs: f32) -> Self::Output {
SubDuration(self.0 * rhs)
}
}
impl MulAssign<f32> for SubDuration {
fn mul_assign(&mut self, rhs: f32) {
self.0 *= rhs;
}
}
impl AddAssign<f32> for SubDuration {
fn add_assign(&mut self, rhs: f32) {
self.0 += rhs;
}
}
impl SubInstant {
/// Scale by a factor with a custom start time
pub fn scale(&self, start: SubInstant, factor: f32) -> SubInstant {
SubInstant(start.0 + (self.0 - start.0) * factor)
}
}
impl AddAssign<f32> for SubInstant {
fn add_assign(&mut self, rhs: f32) {
self.0 += rhs;
}
}
impl AddAssign<SubInstant> for SubInstant {
fn add_assign(&mut self, rhs: SubInstant) {
self.0 += rhs.0;
}
}
impl SubAssign<SubInstant> for SubInstant {
fn sub_assign(&mut self, rhs: SubInstant) {
self.0 -= rhs.0;
}
}
impl SubAssign<SubDuration> for SubInstant {
fn sub_assign(&mut self, rhs: SubDuration) {
self.0 -= rhs.0;
}
}
impl AddAssign<SubDuration> for SubInstant {
fn add_assign(&mut self, rhs: SubDuration) {
self.0 += rhs.0;
}
}
impl Display for SubInstant {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
// TODO optimize this
let sign = self.0.signum();
let mut secs = self.0.abs();
let hours = (secs / 3600f32).floor();
secs -= hours * 3600f32;
let minutes = (secs / 60f32).floor();
secs -= minutes * 60f32;
let msecs = ((secs % 1f32) * 1000f32).round();
write!(f, "{}{:02}:{:02}:{:02},{:03}",
if sign.is_sign_negative() { "-" } else { "" },
hours, minutes, secs.floor(), msecs)
}
}
#[derive(Clone, Debug)]
struct Subtitle {
num: u32,
start: SubInstant,
dur: SubDuration,
text: String,
}
impl Display for Subtitle {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "{}\n{} --> {}\n{}\n\n",
self.num,
self.start, self.start + self.dur,
self.text
)
}
}
impl TryFrom<&str> for SubInstant {
type Error = failure::Error;
fn try_from(value: &str) -> Result<Self, Self::Error> {
lazy_static! {
static ref DATE_RE: Regex = Regex::new(r"^(-)?(?P<h>\d+):(?P<m>\d+):(?P<s>\d+(:?[,.]\d+)?)$").unwrap();
}
match DATE_RE.captures(value) {
Some(caps) => {
let minus = if caps.get(1).is_some() { -1f32 } else { 1f32 };
let h = &caps["h"];
let m = &caps["m"];
let s = caps["s"].replace(",", ".");
Ok(SubInstant(minus * (f32::from_str(h).unwrap() * 3600f32 +
f32::from_str(m).unwrap() * 60f32 +
f32::from_str(&s).unwrap())))
}
None => Err(failure::format_err!("Error parsing time: {}", value))
}
}
}
impl SubDuration {
pub fn as_instant(&self) -> SubInstant {
SubInstant(self.0)
}
}
impl TryFrom<&str> for SubDuration {
type Error = failure::Error;
fn try_from(value: &str) -> Result<Self, Self::Error> {
lazy_static! {
static ref TIME_RE: Regex = Regex::new(r"^(?U)(?P<n>-)?(?:(?P<h>\d+):)?(?:(?P<m>\d+):)?(?P<s>\d+(?:[.,]\d+)?)$").unwrap();
}
match TIME_RE.captures(value) {
Some(caps) => {
let minus = if caps.name("n").is_some() { -1f32 } else { 1f32 };
let h = caps.name("h").map_or(0f32, |m| f32::from_str(m.as_str()).unwrap());
let m = caps.name("m").map_or(0f32, |m| f32::from_str(m.as_str()).unwrap());
let s = caps.name("s").map_or(0f32, |m| f32::from_str(&m.as_str().replace(",", ".")).unwrap());
Ok(SubDuration(minus * (h * 3600f32 +
m * 60f32 +
s)))
}
None => {
Err(failure::format_err!("Error parsing time: {}", value))
}
}
}
}
#[test]
fn test_parse_duration() {
// this is used for user input on the command line
let bad = SubDuration(-1f32);
assert_eq!(SubDuration(45678f32), SubDuration::try_from("45678").unwrap_or(bad), "integer secs");
assert_eq!(SubDuration(1.23f32), SubDuration::try_from("1.23").unwrap_or(bad), "float secs with period");
assert_eq!(SubDuration(-1.23f32), SubDuration::try_from("-1.23").unwrap_or(bad), "MINUS float secs with period");
assert_eq!(SubDuration(1.23f32), SubDuration::try_from("1,23").unwrap_or(bad), "float secs with comma");
assert_eq!(SubDuration(121.15f32), SubDuration::try_from("2:1.15").unwrap_or(bad), "m:s.frac");
assert_eq!(SubDuration(121.15f32), SubDuration::try_from("2:01.15").unwrap_or(bad), "m:0s.frac");
assert_eq!(SubDuration(121.15f32), SubDuration::try_from("02:01.15").unwrap_or(bad), "0m:0s.frac");
assert_eq!(SubDuration(121.15f32), SubDuration::try_from("02:01,15").unwrap_or(bad), "0m:0s,frac");
assert_eq!(SubDuration(3721.15f32), SubDuration::try_from("1:02:01,15").unwrap_or(bad), "h:0m:0s,frac");
assert_eq!(SubDuration(3721.15f32), SubDuration::try_from("1:02:01,15").unwrap_or(bad), "h:0m:0s.frac");
assert_eq!(SubDuration(3721.15f32), SubDuration::try_from("01:02:01,15").unwrap_or(bad), "0h:0m:0s,frac");
assert_eq!(SubDuration(-3721.15f32), SubDuration::try_from("-01:02:01,15").unwrap_or(bad), "-0h:0m:0s,frac");
}
#[test]
fn test_parse_instant() {
let bad = SubInstant(-1f32);
assert_eq!(SubInstant(1081.755f32), SubInstant::try_from("00:18:01,755").unwrap_or(bad));
assert_eq!(SubInstant(1081.755f32), SubInstant::try_from("00:18:01.755").unwrap_or(bad));
assert_eq!(SubInstant(1081.7f32), SubInstant::try_from("00:18:01.7").unwrap_or(bad));
assert_eq!(SubInstant(1081.7f32), SubInstant::try_from("0:18:1.7").unwrap_or(bad));
assert_eq!(SubInstant(0f32), SubInstant::try_from("00:00:00,000").unwrap_or(bad));
assert_eq!(SubInstant(-3600f32), SubInstant::try_from("-01:00:00,000").unwrap_or(bad));
}
#[test]
fn test_stringify_instant() {
assert_eq!("00:18:01,755", SubInstant::try_from("00:18:01,755").unwrap().to_string());
assert_eq!("-00:18:01,755", SubInstant::try_from("-00:18:01,755").unwrap().to_string());
assert_eq!("-00:18:01,700", SubInstant::try_from("-00:18:01.7").unwrap().to_string());
assert_eq!("00:00:00,000", SubInstant::try_from("00:00:00,000").unwrap().to_string());
assert_eq!("-00:00:00,000", SubInstant::try_from("-00:00:00,000").unwrap().to_string());
}