#!/bin/bash

# Make file links for upload to UCSC Genome Browser
# Run script in cluster
# JUST UPDATE THIS PATH
url="https://extshare.salk.edu/iblm_ucsc/mlorenzini/240528_t7_read_counts_chrom_v1/"

# Download extshare html content and extract the URLs from links
date
echo "Making file links..."
links=$(curl -s "$url" | grep -Po '(?<=href=")[^"]*(?=")' | grep -vE '^/|\.txt$' | tail -n +5)
# Append the URL prefix to each line
prefixed_links=$(printf "%s\n" "$links" | awk -v prefix="$url" '{ print prefix $0 }')
# Print the modified URLs
echo "$prefixed_links" > "/iblm/netapp/data4/UCSC_browser_lab_data/${url#'https://extshare.salk.edu/iblm_ucsc/'}links.txt"
date
echo "Done"
