From d0269672ec21fcd0530576a992f498e56d59afe4 Mon Sep 17 00:00:00 2001 From: cmdln Date: Sat, 16 Oct 2010 11:34:10 -0400 Subject: [PATCH] Fixed NS issue. Alt xsl for notes without offsets. Added file handling steps. --- README | 3 ++- outline.bash | 26 ++++++++++++++++++++++---- outline.xsl => with_offset.xsl | 2 -- 3 files changed, 24 insertions(+), 7 deletions(-) rename outline.xsl => with_offset.xsl (93%) diff --git a/README b/README index 2d9159d..43eb483 100644 --- a/README +++ b/README @@ -5,5 +5,6 @@ encode.bash - Drives a set of encoders and tagging utilities to convert a single relink.py - A script intended for one time use to tweak a feed to re-link its enclosures to appropriate URLs at the Internet Archive. tidyrc - Tidy config that approximates the formatting of the live feeds to minimize disruption. publish.bash - Script to automate as much of the publishing step as possible. -outline.xsl - Transform that handles the recursive structure of OmniOutliner files better than Beautiful Soup does. +with_offset.xsl - Transform that handles the recursive structure of OmniOutliner files better than Beautiful Soup does, works with completed show notes that have time offsets. +without_offset.xsl - Transform that handles the recursive structure of OmniOutliner files better than Beautiful Soup does, works with segment notes that do not have time offsets. outline.bash - Drives the XSLT operation and subsequent scripting tasks that cannot be handled in XSL. diff --git a/outline.bash b/outline.bash index a261cc7..aa20cdb 100755 --- a/outline.bash +++ b/outline.bash @@ -28,12 +28,30 @@ # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# TODO copy .gz file based on arg -# TODO gunzip original file -xalan -xsl outline.xsl -in contents.xml -text -out contents.txt + +src=$1/contents.xml +shift +target=$1 +# copy .gz file based on arg +cp "$src" ./contents.xml.gz + +# gunzip original file +gunzip contents.xml.gz +# makes sure the ns is only on one line, also +# makes the interim file more readable for debugging +tidy -config tidyrc -xml -m contents.xml +# xalan doesn't handle ns well, stripp them +sed -e "s/ xmlns=\".*\"//g" -i contents.xml +# TODO use grep to figure out which xsl to use +xalan -xsl without_offset.xsl -in contents.xml -text -out contents.txt +# expand the indent counts to proper leading white space sed -e "s/^2/ /" -i contents.txt sed -e "s/^3/ /" -i contents.txt sed -e "s/^4/ /" -i contents.txt sed -e "s/^5/ /" -i contents.txt sed -e "s/^6/ /" -i contents.txt -less contents.txt + +# snug the result where requested +mv contents.txt "$target" +# clean up the temporary files +rm contents.xml diff --git a/outline.xsl b/with_offset.xsl similarity index 93% rename from outline.xsl rename to with_offset.xsl index 8b35413..4ca6a10 100644 --- a/outline.xsl +++ b/with_offset.xsl @@ -6,8 +6,6 @@ -