Semi-complete Podcast Downloader Script

I’m sure I’ll be modifying this more over time. But, it reads from a text file (podcasts.txt) in the given DOWNLOAD_PATH–this file contains the RSS URL(s) for the podcast(s). It organizes the podcasts into directories, if those directories don’t already exist. It uses eyeD3 to add metadata (including cover art). Works fairly well, though I’m still stomping bugs.

Maybe next I’ll put together a nice little frontend for it–maybe convert the whole thing to a Blazor app. But, for now, it does the thing and it’s fun being a script kiddie 🙂

#!/bin/bash

create_path() {
    TITLE_PATH="${DOWNLOAD_PATH}/${1}"

    if [ ! -d "${TITLE_PATH}" ]; then
       mkdir "${TITLE_PATH}"
    fi
}

sanitize_filename() {
  local filename="$1"
  # Replace unfriendly characters with underscore
  sanitized=$(echo "$filename" | sed 's/[\/:*?"<>|]/_/g')
  echo "$sanitized"
}

dl_podcast() {
    local RSS_FEED_URL=$1

    curl -s "$RSS_FEED_URL" > /tmp/rss_feed.xml

    title=$(get_podcast_title "${RSS_FEED_URL}")

    create_path "${title}"

    # Initialize arrays to store episode titles and enclosure URLs
    titles=()
    urls=()

    # Extract episode titles and enclosure URLs
    mapfile -t titles < <(xmllint --xpath '//item/title/text()' /tmp/rss_feed.xml)
    mapfile -t urls < <(xmllint --xpath '//item/enclosure/@url' /tmp/rss_feed.xml)

    # Print the episode titles and enclosure URLs
    j=${#titles[@]}
    padtowidth=${#j}
    for ((i=0; i<${#titles[@]}; i++)); do
	ep_title=$(echo "${titles[$i]}" | sed -e 's/<!\[CDATA\[//' -e 's/\]\]>//')
        url=`echo ${urls[$i]} | sed "s/url=//g"`
        url=`echo ${url} | sed "s/\"//g"`
        filenum=`printf "%0*d\n" $padtowidth $j`
        file="${filenum} $ep_title.mp3"
        file=$(sanitize_filename "${file}")

        if [ ! -f "${TITLE_PATH}/${file}" ]; then
            echo "Downloading: ${file} (${url})"
            wget -qO "${TITLE_PATH}/${file}" "${url}"
	    /usr/bin/eyeD3 --track "${filenum}" --disc-num "1" "${TITLE_PATH}/${file}"
	    /usr/bin/eyeD3 --add-image="/tmp/cover.jpg":FRONT_COVER "${TITLE_PATH}/${file}"
        fi
        j=`expr ${j} - 1`
    done

    unset ${TITLE_PATH}
}

get_podcast_title() {
    local url="${1}"
    local title="$(curl -s "$url" | grep -oP '<title>\K[^<]+' | sed -n '2p')"
    echo "${title}"
}

get_podcast_cover() {
    local url=$1
    local cover_url=$(curl -s "$url" | grep -oP '<itunes:image\s+href="\K[^"]+' | head -n 1)

    curl -s -o /tmp/cover.jpg ${cover_url}
}

cleanup() {
    rm /tmp/cover.jpg
    rm /tmp/rss_feed.xml
}

read_lines_into_array() {
    local file=$1

    # Check if file exists
    if [ ! -f "$file" ]; then
        echo "Error: File $file not found."
        exit 1
    fi

    # Read the file line by line and append each line to the array
    mapfile -t lines_array < "${file}"

    # Return the array
    echo "${lines_array[@]}"
}

process_podcasts() {
    local -n podcasts=$1

    for link in "${podcasts[@]}"; do
        get_podcast_cover "$link"

        title=$(get_podcast_title "$link")

        dl_podcast $link
    done
}

main()
{
    DOWNLOAD_PATH="<PODCAST DIRECTORY GOES HERE>"
    TITLE_PATH=${DOWNLOAD_PATH}

    if [ $# -ne 1 ]; then
        echo "Usage: $0 <rss url>"
        exit 1
    fi

    file=$1

    # Call the function and store the result in an array
    declare -A links
    links=$(read_lines_into_array "$file")

    # Print the contents of the resulting array
    process_podcasts links
    cleanup
}

main "$@"

Leave a Reply

This site uses Akismet to reduce spam. Learn how your comment data is processed.