From 803f462592affac537c003df7937d30c805053b3 Mon Sep 17 00:00:00 2001 From: Pekka Helenius Date: Wed, 2 Nov 2022 23:09:00 +0200 Subject: [PATCH] getsource: fix file URL pattern match in HTML data --- tools/getsource.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/getsource.sh b/tools/getsource.sh index bd4737a..289d935 100755 --- a/tools/getsource.sh +++ b/tools/getsource.sh @@ -117,11 +117,13 @@ function fetch_database() { for GITBASE in ${GITBASES[@]}; do BASEURL="${DOMAINURL}/${GITBASE}/tree/packages/${PACKAGE}/trunk" + BASEURL_RELATIVE_FILES="/${GITBASE}/blob/packages/${PACKAGE}/trunk" if get_url "${BASEURL}" "${URLFILE}"; then + FILENAMES=() + FILEHREFS=( $(grep -oP "(?=${BASEURL_RELATIVE_FILES}).*?(?=\"\>)" "${URLFILE}" | sed 's/blob//') ) - FILEHREFS=( $(grep -oP '(?<=data-pjax).*?(?=\<\/a)' "${URLFILE}" | sed -r "s/.*href=[\"|'](.*)[\"|']>.*/\1/; s/\/blob//g" | grep trunk) ) for i in ${FILEHREFS[@]}; do FILENAMES+=( $(echo "${i}" | sed 's/.*\///g') ) done @@ -155,11 +157,13 @@ function fetch_database() { for GITBASE in ${GITBASES[@]}; do for DATABASE in ${DATABASES[@]}; do BASEURL="${DOMAINURL}/${GITBASE}/tree/master/${DATABASE}/${PACKAGE}" + BASEURL_RELATIVE_FILES="/${GITBASE}/blob/master/${DATABASE}/${PACKAGE}" if get_url "${BASEURL}" "${URLFILE}"; then + FILENAMES=() + FILEHREFS=( $(grep -oP "(?=${BASEURL_RELATIVE_FILES}).*?(?=\"\>)" "${URLFILE}" | sed 's/blob//') ) - FILEHREFS=( $(grep -oP '(?<=data-pjax).*?(?=\<\/a)' "${URLFILE}" | sed -r "s/.*href=[\"|'](.*)[\"|']>.*/\1/; s/\/blob//g" | grep ${PACKAGE}) ) for i in ${FILEHREFS[@]}; do FILENAMES+=( $(echo "${i}" | sed 's/.*\///g') ) done