#!/usr/bin/env perl use File::Basename; use SoundLabel; #---------------------------- #For example, ./make_text_grids transcript.txt sound.wav #---------------------------- # Authors: Keith Johnson (keithjohnson@berkeley.edu) # # Copyright (c) 2014, The Regents of the University of California # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this # list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright notice, # this list of conditions and the following disclaimer in the documentation # and/or other materials provided with the distribution. # # * Neither the name of the University of California nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. if ($ARGV[0] eq "-h") { print "example: ./make_text_grids sound.wav transcript.txt sound.TextGrid\n"; exit; } $wav = $ARGV[0]; # wav file $TL = $ARGV[1]; # transcript $tg = $ARGV[2]; #textgrid target die "$TL: $!\n" unless (-e $TL); die "$wav: $!\n" unless (-e $wav); $bignumber = 1000000000; $count = 1; $start=0; $end = $bignumber; open (IN, "<$TL") or die "open $TL failed: $!\n"; open (OUT, ">temp.txt") or die "open temp.txt failed $!\n"; ($file,$dir) = fileparse($tg,".TextGrid"); $tgrid_name = $dir . $file; $outname = $tg; if (-e $outname) { $old_tgrid = $outname.".old"; `mv $outname $old_tgrid`; } while () { if (/^#(.+)/) { # line is a skip # x, y ($end,$temp) = split(/,/,$1); print "\nSkipping from time $end to time $temp.\n"; if ($end>$start) { close OUT; $outname = $tgrid_name."_".$count.".TextGrid"; `pyalign -s $start -e $end $wav temp.txt $outname`; `rm -r tmp`; $count++; open (OUT, ">temp.txt") or die "open temp.txt failed $!\n"; } $start=$temp; } else {printf OUT $_;} } if ($start > $end) { # there was no final skip mark close OUT; $outname = $tgrid_name."_".$count.".TextGrid"; `pyalign -s $start $wav temp.txt $outname`; `rm -r tmp`; } $final_outname = $tgrid_name.".TextGrid"; if ($end == $bignumber) { # bare transcript: no skip regions # x, y `pyalign $wav $TL $final_outname`; `rm -r tmp`; } else { # transcript had skip regions `rm temp.txt`; # now that we have made a bunch of textgrids, let's concatenate them $outname = $tgrid_name.".TextGrid.tmp"; `concat_pyalign_textgrids --ofile $outname ${tgrid_name}*.TextGrid`; `rm ${tgrid_name}_[0-9]*.TextGrid`; `mv $outname $final_outname`; }