#!/usr/bin/perl #(9/27/2023): Accepts an SRT captioning file and runs it through Bison-32K use JSON::XS; #./ INFILE OUTFILE FORMAT(SRT/TXT) PROMPTNUM if ($ARGV[0] eq '' || $ARGV[1] eq '' || $ARGV[2] eq '' || $ARGV[3] eq '' || (-s $ARGV[0]) < 50 ) { print "FATAL: Invalid Parameters...\n"; exit; } ################################################################## #LOAD TRANSCRIPT... if ($ARGV[2] eq 'SRT') { #load from SRT... open(FILE, $ARGV[0]); binmode(FILE, ":utf8"); while() { $_=~s/[\x{FEFF}\x{FFFF}]/ /g; #first transcode a few characters... $_=~s/\s+$//; $_=~s/^\s+//; #remove leading/trailing spaces... #skip formatting lines... if ($_=~/^\d+$/ || $_ eq '') { next; }; if ($_=~/^[\d:,\-\> ]+$/) { next; }; #remove speaker notations... $_=~s/^>>\s*//; #$mid = ord(substr($_,0,1)); #codify oddities... $TRANSCRIPT .= $_ . ' '; } close(FILE); #clean up extra spaces... $TRANSCRIPT=~s/\s+/ /gs; } else { #load from TXT... open(FILE, $ARGV[0]); binmode(FILE, ":utf8"); read(FILE, $TRANSCRIPT, (-s FILE)); close(FILE); } ################################################################## ################################################################## #CONSTRUC, SUBMIT AND RECEIVE OUR RESULTS... #$TRANSCRIPT = 'Welcome to tonights broadcast. Our first story is that Donald Trump announced he was running for president again. Joe Biden is traveling to China next week to discuss Ukraine.'; #add our prompt... if ($ARGV[3] eq 'SUMMARIZE') { $TRANSCRIPT = "Divide the television news transcript below into a set of discrete stories, make them into a bulleted list with a title and short description and summary of each in English. Each story should include a Title, Description and Summary and be written in English. Do not use any information beyond what is in the transcript below. \n\nTELEVISION NEWS TRANSCRIPT:\n" . $TRANSCRIPT; } elsif ($ARGV[3] eq 'SUMMARIZESUMMARIES') { $TRANSCRIPT = "Below is a collection of television news summaries describing a day's news on a television news channel. Take all of the summaries below, combine the ones that describe the same story and divide the final summary into a set of discrete stories, make them into a bulleted list with a title and short description and summary of each in English. Each story should include a Title, Description and Summary and be written in English. Do not use any information beyond what is in the summaries below. \n\nTELEVISION NEWS SUMMARIES:\n" . $TRANSCRIPT; } #construct the submission... $JSON->{'instances'}[0]->{'prompt'} = $TRANSCRIPT; $JSON->{'parameters'}->{'temperature'} = 0.2; $JSON->{'parameters'}->{'maxOutputTokens'} = 8192; #$JSON->{'parameters'}->{'topK'} = 40; #$JSON->{'parameters'}->{'topP'} = 0.95; open(OUT, ">$ARGV[1].toapi"); print OUT JSON::XS->new->allow_nonref(1)->utf8->encode( $JSON ); close(OUT); #submit it... unlink("$ARGV[1].fromapi"); open(LOG, ">>SUBMIT.LOG"); print LOG localtime(time) . ": curl -s -X POST -H \"Authorization: Bearer \$(gcloud auth print-access-token)\" -H \"Content-Type: application/json\" https://us-central1-aiplatform.googleapis.com/v1/projects/[YOURPROJECTID]/locations/us-central1/publishers/google/models/text-bison-32k:predict -d \@$ARGV[1].toapi > $ARGV[1].fromapi\n"; close(LOG); system("curl -s -X POST -H \"Authorization: Bearer \$(gcloud auth print-access-token)\" -H \"Content-Type: application/json\" https://us-central1-aiplatform.googleapis.com/v1/projects/[YOURPROJECTID]/locations/us-central1/publishers/google/models/text-bison-32k:predict -d \@$ARGV[1].toapi > $ARGV[1].fromapi"); #and get the results... system("cat $ARGV[1].fromapi | jq -r .predictions[0].content > $ARGV[1]"); ##################################################################