Files
yt-dlp-webui/server/internal/process.go
2025-01-01 09:38:59 +01:00

382 lines
8.3 KiB
Go

package internal
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"log/slog"
"regexp"
"slices"
"syscall"
"os"
"os/exec"
"strings"
"time"
"github.com/marcopiovanello/yt-dlp-web-ui/v3/server/archiver"
"github.com/marcopiovanello/yt-dlp-web-ui/v3/server/config"
)
const downloadTemplate = `download:
{
"eta":%(progress.eta)s,
"percentage":"%(progress._percent_str)s",
"speed":%(progress.speed)s
}`
// filename not returning the correct extension after postprocess
const postprocessTemplate = `postprocess:
{
"filepath":"%(info.filepath)s"
}
`
const (
StatusPending = iota
StatusDownloading
StatusCompleted
StatusErrored
)
// Process descriptor
type Process struct {
Id string
Url string
Livestream bool
Params []string
Info DownloadInfo
Progress DownloadProgress
Output DownloadOutput
proc *os.Process
}
// Starts spawns/forks a new yt-dlp process and parse its stdout.
// The process is spawned to outputting a custom progress text that
// Resembles a JSON Object in order to Unmarshal it later.
// This approach is anyhow not perfect: quotes are not escaped properly.
// Each process is not identified by its PID but by a UUIDv4
func (p *Process) Start() {
// escape bash variable escaping and command piping, you'll never know
// what they might come with...
p.Params = slices.DeleteFunc(p.Params, func(e string) bool {
match, _ := regexp.MatchString(`(\$\{)|(\&\&)`, e)
return match
})
p.Params = slices.DeleteFunc(p.Params, func(e string) bool {
return e == ""
})
out := DownloadOutput{
Path: config.Instance().DownloadPath,
Filename: "%(title)s.%(ext)s",
}
if p.Output.Path != "" {
out.Path = p.Output.Path
}
if p.Output.Filename != "" {
out.Filename = p.Output.Filename
}
buildFilename(&p.Output)
templateReplacer := strings.NewReplacer("\n", "", "\t", "", " ", "")
baseParams := []string{
strings.Split(p.Url, "?list")[0], //no playlist
"--newline",
"--no-colors",
"--no-playlist",
"--progress-template",
templateReplacer.Replace(downloadTemplate),
"--progress-template",
templateReplacer.Replace(postprocessTemplate),
}
// if user asked to manually override the output path...
if !(slices.Contains(p.Params, "-P") || slices.Contains(p.Params, "--paths")) {
p.Params = append(p.Params, "-o")
p.Params = append(p.Params, fmt.Sprintf("%s/%s", out.Path, out.Filename))
}
params := append(baseParams, p.Params...)
slog.Info("requesting download", slog.String("url", p.Url), slog.Any("params", params))
cmd := exec.Command(config.Instance().DownloaderPath, params...)
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
stdout, err := cmd.StdoutPipe()
if err != nil {
slog.Error("failed to get a stdout pipe", slog.Any("err", err))
panic(err)
}
stderr, err := cmd.StderrPipe()
if err != nil {
slog.Error("failed to get a stderr pipe", slog.Any("err", err))
panic(err)
}
if err := cmd.Start(); err != nil {
slog.Error("failed to start yt-dlp process", slog.Any("err", err))
panic(err)
}
p.proc = cmd.Process
ctx, cancel := context.WithCancel(context.Background())
defer func() {
stdout.Close()
p.Complete()
cancel()
}()
logs := make(chan []byte)
go produceLogs(stdout, logs)
go p.consumeLogs(ctx, logs)
go p.detectYtDlpErrors(stderr)
cmd.Wait()
}
func produceLogs(r io.Reader, logs chan<- []byte) {
go func() {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
logs <- scanner.Bytes()
}
}()
}
func (p *Process) consumeLogs(ctx context.Context, logs <-chan []byte) {
for {
select {
case <-ctx.Done():
slog.Info("detaching from yt-dlp stdout",
slog.String("id", p.getShortId()),
slog.String("url", p.Url),
)
return
case entry := <-logs:
p.parseLogEntry(entry)
}
}
}
func (p *Process) parseLogEntry(entry []byte) {
var progress ProgressTemplate
var postprocess PostprocessTemplate
if err := json.Unmarshal(entry, &progress); err == nil {
p.Progress = DownloadProgress{
Status: StatusDownloading,
Percentage: progress.Percentage,
Speed: progress.Speed,
ETA: progress.Eta,
}
slog.Info("progress",
slog.String("id", p.getShortId()),
slog.String("url", p.Url),
slog.String("percentage", progress.Percentage),
)
}
if err := json.Unmarshal(entry, &postprocess); err == nil {
p.Output.SavedFilePath = postprocess.FilePath
// slog.Info("postprocess",
// slog.String("id", p.getShortId()),
// slog.String("url", p.Url),
// slog.String("filepath", postprocess.FilePath),
// )
}
}
func (p *Process) detectYtDlpErrors(r io.Reader) {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
slog.Error("yt-dlp process error",
slog.String("id", p.getShortId()),
slog.String("url", p.Url),
slog.String("err", scanner.Text()),
)
}
}
// Keep process in the memoryDB but marks it as complete
// Convention: All completed processes has progress -1
// and speed 0 bps.
func (p *Process) Complete() {
// auto archive
// TODO: it's not that deterministic :/
if p.Progress.Percentage == "" && p.Progress.Speed == 0 {
var serializedMetadata bytes.Buffer
json.NewEncoder(&serializedMetadata).Encode(p.Info)
archiver.Publish(&archiver.Message{
Id: p.Id,
Path: p.Output.SavedFilePath,
Title: p.Info.Title,
Thumbnail: p.Info.Thumbnail,
Source: p.Url,
Metadata: serializedMetadata.String(),
CreatedAt: p.Info.CreatedAt,
})
}
p.Progress = DownloadProgress{
Status: StatusCompleted,
Percentage: "-1",
Speed: 0,
ETA: 0,
}
// for safety, if the filename is not set, set it with original function
if p.Output.SavedFilePath == "" {
p.GetFileName(&p.Output)
}
slog.Info("finished",
slog.String("id", p.getShortId()),
slog.String("url", p.Url),
)
}
// Kill a process and remove it from the memory
func (p *Process) Kill() error {
defer func() {
p.Progress.Status = StatusCompleted
}()
// yt-dlp uses multiple child process the parent process
// has been spawned with setPgid = true. To properly kill
// all subprocesses a SIGTERM need to be sent to the correct
// process group
if p.proc == nil {
return errors.New("*os.Process not set")
}
pgid, err := syscall.Getpgid(p.proc.Pid)
if err != nil {
return err
}
if err := syscall.Kill(-pgid, syscall.SIGTERM); err != nil {
return err
}
return nil
}
func (p *Process) GetFileName(o *DownloadOutput) error {
cmd := exec.Command(
config.Instance().DownloaderPath,
"--print", "filename",
"-o", fmt.Sprintf("%s/%s", o.Path, o.Filename),
p.Url,
)
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
out, err := cmd.Output()
if err != nil {
return err
}
p.Output.SavedFilePath = strings.Trim(string(out), "\n")
return nil
}
func (p *Process) SetPending() {
// Since video's title isn't available yet, fill in with the URL.
p.Info = DownloadInfo{
URL: p.Url,
Title: p.Url,
CreatedAt: time.Now(),
}
p.Progress.Status = StatusPending
}
func (p *Process) SetMetadata() error {
cmd := exec.Command(config.Instance().DownloaderPath, p.Url, "-J")
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
stdout, err := cmd.StdoutPipe()
if err != nil {
slog.Error("failed to connect to stdout",
slog.String("id", p.getShortId()),
slog.String("url", p.Url),
slog.String("err", err.Error()),
)
return err
}
stderr, err := cmd.StderrPipe()
if err != nil {
slog.Error("failed to connect to stderr",
slog.String("id", p.getShortId()),
slog.String("url", p.Url),
slog.String("err", err.Error()),
)
return err
}
info := DownloadInfo{
URL: p.Url,
CreatedAt: time.Now(),
}
if err := cmd.Start(); err != nil {
return err
}
var bufferedStderr bytes.Buffer
go func() {
io.Copy(&bufferedStderr, stderr)
}()
slog.Info("retrieving metadata",
slog.String("id", p.getShortId()),
slog.String("url", p.Url),
)
if err := json.NewDecoder(stdout).Decode(&info); err != nil {
return err
}
p.Info = info
p.Progress.Status = StatusPending
if err := cmd.Wait(); err != nil {
return errors.New(bufferedStderr.String())
}
return nil
}
func (p *Process) getShortId() string { return strings.Split(p.Id, "-")[0] }
func buildFilename(o *DownloadOutput) {
if o.Filename != "" && strings.Contains(o.Filename, ".%(ext)s") {
o.Filename += ".%(ext)s"
}
o.Filename = strings.Replace(
o.Filename,
".%(ext)s.%(ext)s",
".%(ext)s",
1,
)
}