------------------------------------------------------------------------------
-- STRINGS (package body)                                                   --
--                                                                          --
-- Part of TextTools                                                        --
-- Designed and Programmed by Ken O. Burtch                                 --
--                                                                          --
------------------------------------------------------------------------------
--                                                                          --
--                 Copyright (C) 1999-2007 Ken O. Burtch                    --
--                                                                          --
-- This is free software;  you can  redistribute it  and/or modify it under --
-- terms of the  GNU General Public License as published  by the Free Soft- --
-- ware  Foundation;  either version 2,  or (at your option) any later ver- --
-- sion.  This is distributed in the hope that it will be useful, but WITH- --
-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
-- for  more details.  You should have  received  a copy of the GNU General --
-- Public License  distributed with this;  see file COPYING.  If not, write --
-- to  the Free Software Foundation,  59 Temple Place - Suite 330,  Boston, --
-- MA 02111-1307, USA.                                                      --
--                                                                          --
-- As a special exception,  if other files  instantiate  generics from this --
-- unit, or you link  this unit with other files  to produce an executable, --
-- this  unit  does not  by itself cause  the resulting  executable  to  be --
-- covered  by the  GNU  General  Public  License.  This exception does not --
-- however invalidate  any other reasons why  the executable file  might be --
-- covered by the  GNU Public License.                                      --
--                                                                          --
-- This is maintained at http://www.pegasoft.ca/tt.html                     --
--                                                                          --
------------------------------------------------------------------------------
--with text_io; use text_io; -- debugging only

pragma optimize( time ); -- ignored by GNAT 2.0
--pragma Normalize_Scalars;
pragma suppress( index_check );
pragma suppress( range_check );

with ada.strings.unbounded;
use  ada.strings.unbounded;

package body strings is

dips : constant string := "upanlyscolableutalisifensusteasauayeeieoeseyiaotoouuichetirontrshaithoaghurngeregundewhbackamedorvarine a d f o n r s  e_r_s_e.";

procedure FixSpacing( s : in out str255 ) is
-- remove leading and trailing spaces, as well as any double-spaces inside
   i  : integer;
begin
  while length(s) > 0 loop
    exit when Element( s, 1 ) /= ' ';
    Delete( s, 1, 1 );
  end loop;
  while length(s) > 0 loop
    exit when Element( s, length(s) ) /= ' ';
    Delete( s, length(s), length(s) );
  end loop;
  i := 1;
  while i < length(s) loop
    if Element( s, i ) = ' ' and then Element( s, i+1 ) = ' ' then
       Delete( s, i, i );
       i := i - 1;
    end if;
    i := i + 1;
  end loop;
end FixSpacing;

function HashOf( hashstr : str255 ) return long_integer is
-- a quick and easy ASCII string hash function
  weight : integer := 1; -- current weight being applied
  weights: array(1..26) of long_integer :=
           (97, 89, 83, 79, 73, 2, 3, 5, 7, 71, 67, 61, 59, 53,
            49, 47, 43, 41, 37, 31, 29, 23, 19, 17, 13, 11);
            -- a list of primes under 100
  total  : long_integer := 0; -- running total for hash value
begin
  for i in 1..length(hashstr) loop
      total := total + (weights(weight)*( character'pos(
               Element( hashstr, i ) ) - 32));
      if weight = weights'last then
         weight := 1;
      else
         weight := weight + 1;
      end if;
  end loop;
  return total;
end HashOf;

function PhoneticsOf( s : str255 ) return str255 is
-- reduce string to ENGLISH phonetics
-- equivalences from Talking Tools pg.12 (and from guessing)
  pos  : natural := 1;                  -- position in s
  ppos : natural := 1;                  -- position in PhoneticString
  PhoneticString : str255;              -- the resulting phonetics
  ch : character;                       -- current character in s
  AllowDuplicate : boolean := false;    -- TRUE to discard same adjacents
 
  function NextChar return character is
    -- get the next character (if none, return a space)
    ch : character;
  begin
    if pos < length(s) then
       ch := Element( s, pos+1 );
       if ch >= 'A' and ch <= 'Z' then
          ch := character'val( character'pos(ch) + 32 );
       end if;
       return ch;
    else
       return ' ';
    end if;
  end NextChar;

  procedure Add( c : character ) is
  -- add a phoeme to the Phonetic String, discarding adjacent duplicates
  -- if it's OK.  Some very similar sounds are grouped together (th & d)
  begin
    if ppos <= Strings255.Max_Length then
       if ppos = 1 or AllowDuplicate then
          PhoneticString := Append( PhoneticString, c );
          ppos := ppos + 1;
          AllowDuplicate := false;
       else
          if Element( PhoneticString, ppos-1 ) /= c then
             PhoneticString := Append( PhoneticString, c );
             ppos := ppos + 1;
          end if;
       end if;
    end if;
  end Add;

  procedure SkipChar is
  -- macro to advance to next position in s
  begin
    pos := pos + 1;
  end SkipChar;
  pragma Inline( SkipChar );

begin
  PhoneticString := NullStr255;
  while pos <= Strings255.Max_Length and ppos <= 80 loop
      ch := Element( s, pos );
      if ch >= 'A' and ch <= 'Z' then
         ch := character'val( character'pos(ch) + 32 );
      end if;
      case ch is
      when 'a' =>
        case NextChar is
        when 'a'|'e'|'i'|'y' =>                        -- aa, ae, ai, ay
           Add( 'A' );
           SkipChar;
        when 'r' =>                                    -- ar
           Add( 'R' );
           SkipChar;
        when 'u' =>                                    -- au
           Add( 'U' );
           SkipChar;
        when others => 
           Add( 'A' );                                 -- a
        end case;
      when 'b' =>                                      -- b
        Add( 'B' );
      when 'd' =>                                      -- d
        Add( 'D' );
      when 't' =>
        if NextChar = 'h' then                         -- th (H)
           Add( 'H' );
           SkipChar;
        else
           Add( 'D' );                                 -- t (=d)
        end if;
      when 'p' =>
        if NextChar = 'h' then                         -- ph (F)
           Add( 'F' );
           SkipChar;
        else
           Add( 'P' );                                 -- p
        end if;
      when 'c' =>                                      -- c*
         if NextChar = 'h' then                        -- ch (Y)
            Add( 'Y' );
            SkipChar;
         else
            Add( 'C' );
         end if;
      when 'e' => 
        case NextChar is
        when 'a' => Add( 'E' ); SkipChar;              -- ea
        when 'i' => Add( 'I' ); SkipChar;              -- ei
        when 'e' => Add( 'E' ); SkipChar;              -- ee
        when 'r' => Add( 'R' ); SkipChar;              -- er
        when 'u' => Add( 'U' ); SkipChar;              -- eu
        when 'y' => Add( 'A' ); SkipChar;              -- ey
        when ' '|'?'|'''|':'|';'|'.'|',' => SkipChar; -- e (silent)
        when others =>                                 -- e
             Add( 'E' );
        end case;
      when 'f' =>                                      -- f
        Add( 'F' );
      when 'g' =>                                      -- gh
        if NextChar = 'h' then
           SkipChar;
        else
           Add( 'G' );                                 -- g*
        end if;
      when 'h' =>                                      -- h
        null;
      when 'i' =>                                      -- i
        if NextChar = 'e' then                         -- ie
           Add( 'E' );
           SkipChar;
        elsif NextChar = 'r' then                      -- ir
           Add( 'R' );
           SkipChar;
        elsif NextChar = 'o' then                      -- ion
           pos := pos + 1;
           if NextChar = 'n' then
              Add( 'U' );
              Add( 'N' );
              SkipChar;
           else
              pos := pos - 1; -- treat normally
              Add( 'I' );
           end if;
        else
           Add( 'I' );
        end if;
      when 'j' =>                                      -- j
        Add( 'J' );
      when 'k'|'q' =>                                  -- k
        Add('K');
        if NextChar = 'u' then                         -- qu (KW)
           Add( 'W' );
           SkipChar;
        end if;
      when 'l'|'r' =>                                  -- l, r
        Add( 'R' );
      when 'm' =>                                      -- m
        Add( 'N' );
      when 'n' =>
        if NextChar = 'g' then
           SkipChar;                                   -- ng (=n)
        end if;
        Add( 'N' );                                    -- n
      when 'o' =>
        case NextChar is
        when 'a' =>                                    -- oa
             Add( 'O' );
             SkipChar;
        when 'o' =>                                    -- oo
             Add( 'U' ); 
             SkipChar;
        when 'r' =>                                    -- or
             Add( 'R' );
             SkipChar;
        when 'u' =>                                    -- ou
             Add( 'U' );
             SkipChar;
        when others =>                                 -- o
             Add( 'O' );
        end case;
      when 's' =>                                      -- sh (H)
        if NextChar = 'h' then
           Add( 'H' );
           SkipChar;
        else
           Add( 'S' );                                 -- s
        end if;
      when 'u' =>
        if NextChar = 'y' then                         -- uy
           Add( 'I' );
           SkipChar;
        elsif NextChar = 'r' then                      -- ur
           Add( 'R' );
           SkipChar;
        else
           Add ( 'U' );                                -- u
        end if;
      when 'v' =>                                      -- v
        Add( 'V' );
      when 'w' =>                                      -- w
        Add( 'W' );
      when 'x'|'z' =>                                  -- x, z
        Add( 'Z' );
      when 'y' =>                                      -- y
        Add( 'I' );
      when others =>
        AllowDuplicate := true;  -- allow two together if sep by sp, ', etc
        if ch >= '0' and ch <= '9' then                -- 0...9
           Add( ch );
           AllowDuplicate := true;
        end if;
      end case;
      pos := pos + 1;
  end loop;
  return PhoneticString;
end PhoneticsOf;

function TypoOf( BadString, GoodString : Str255 ) return boolean is
-- 80% of all typos are single insertions, deletions, exchanges, or subs.
  TempStr : Str255;
  BadLen  : integer;
  GoodLen : integer;
  IsTypo  : boolean;
  TempChar : character;
begin
  IsTypo := false;
  BadLen := length( BadString );
  GoodLen := length( GoodString );

  if BadString = GoodString then -- identical?
     return false;
  elsif BadLen < 4 or GoodLen < 4 then -- too short to test reliably?
     return false;
  end if;

  -- Single Insertion
  if BadLen = GoodLen+1 then
     for i in 1..BadLen loop
         if Delete( BadString, i, i ) = GoodString then
            IsTypo := true;
         end if;
     end loop;
  end if;

  -- Single Deletion
  if BadLen = GoodLen-1 then
     for i in 1..GoodLen loop
         if BadString = Delete( GoodString, i, i ) then
            IsTypo := true;
         end if;
     end loop;
  end if;

  -- Single Exchange
  if BadLen = GoodLen and not IsTypo then
     TempStr := BadString;
     for i in 1..BadLen-1 loop
         TempChar := Element( TempStr, i );
         Replace_Element( TempStr, i, Element( TempStr, i+1 ) );
         Replace_Element( TempStr, i+1, TempChar );
         if TempStr = GoodString then
            IsTypo := true;
         end if;
         Replace_Element( TempStr, i+1, Element( TempStr, i ) );
         Replace_Element( TempStr, i, TempChar );
    end loop;
  end if;

  -- Single Substitution
  if BadLen = GoodLen and not IsTypo then
     for i in 1..BadLen loop
         if Delete( BadString, i, i ) = Delete( GoodString, i, i ) then
            IsTypo := true;
         end if;
     end loop;
  end if;

  return IsTypo;

end TypoOf;

procedure Tokenize( s : str255; words : in out str255list.list ;
   ch : in out character ) is
-- encode a word as a character > 127
  Index : Str255List.AListIndex := Str255List.AListIndex'last;
begin
  Str255List.Find( Words, s, 1, Index );
  if Index = 0 or Index > 128 then
     ch := character'val( Index ); --' ';
  else
     ch := character'val( Index + 127 );
  end if;
end Tokenize;

procedure Untokenize( ch : character ; words : in out Str255List.List ;
  s : in out Str255 ) is
  Index : Str255List.AListIndex := Str255List.AListIndex'last;
begin
  s := NullStr255;
  if character'pos( ch ) > 127 then
     Index := Str255List.AListIndex( character'pos( ch ) - 127 );
     Str255List.Find( Words, Index, s );
  end if;
end Untokenize;

function  ToUpper( s : str255 ) return str255 is
  ch : character;
  newstr : str255;
begin
  newstr := s;
  for i in 1..length( s ) loop
      ch := Element( s, i );
      if ch >= 'a' and ch <= 'z' then
         ch := character'val ( character'pos( ch ) - 32 );
         Replace_Element( newstr, i, ch );
      end if;
  end loop;
  return newstr;
end ToUpper;

function ToLower( s : str255 ) return str255 is
  ch : character;
  newstr : str255;
begin
  newstr := s;
  for i in 1..length( s ) loop
      ch := Element( s, i );
      if ch >= 'A' and ch <= 'Z' then
         ch := character'val( character'pos( ch ) + 32 );
         Replace_Element( newstr, i, ch );
      end if;
  end loop;
  return newstr;
end ToLower;

function FGREP( s : str255; text : str255; filter_out : boolean := false;
-- implementation of UNIX fgrep for a single line of text
-- true if fgrep matches
  case_insensitive : boolean := false ) return boolean is
  Source, Target : str255;
begin
  if case_insensitive then
     Target := ToUpper( s );
     Source := ToUpper( text );
  else
     Target := s;
     Source := text;
  end if;
  return (Index( Source, ToString( Target ) ) > 0) xor filter_out;
end FGREP;

function FGREP( s : str255; text : str255; filter_out : boolean := false;
-- implementation of UNIX fgrep for a single line of text
-- returns the line if grep matches
  case_insensitive : boolean := false ) return str255 is
  Source, Target : str255;
begin
  if case_insensitive then
     Target := ToUpper( s );
     Source := ToUpper( text );
  else
     Target := s;
     Source := text;
  end if;
  if (Index( Source, ToString( Target ) ) > 0) xor filter_out then
     return text;
  else
     return NullStr255;
  end if;
end FGREP;

procedure FGREP( s : str255; text : in out Str255List.List;
  result : out boolean; filter_out : boolean := false;
-- implementation of UNIX fgrep for a list of strings
-- result is true if there were any matches
  case_insensitive : boolean := false ) is
  Results : Str255List.List;
  NextLine : str255;
  Target : str255;
  Source : str255;
  TheResult : boolean := true;
begin
  if case_insensitive then
     Target := ToUpper( s );
  else
     Target := s;
  end if;
  while Str255List.Length( text ) > 0 loop
    Str255List.Pull( text, NextLine );
    if case_insensitive then
       Source := ToUpper( NextLine );
    else
       Source := NextLine;
    end if;
    if Index( Source, ToString( Target ) ) > 0 then -- should rewrite to
       if not filter_out then                         -- get rid of ToString
          TheResult := true;
          exit;
       end if;
    else
       if filter_out then
          TheResult := true;
          exit;
       end if;
    end if;
 end loop;
 Result := TheResult;
end FGREP;
  
procedure FGREP( s : str255; text : in out Str255List.List;
-- implementation of UNIX fgrep for a list of strings
-- filters in/out matching strings
  filter_out : boolean := false; case_insensitive : boolean := false ) is
  Results : Str255List.List;
  NextLine : str255;
  Target : str255;
  Source : str255;
begin
  if case_insensitive then
     Target := ToUpper( s );
  else
     Target := s;
  end if;
  while Str255List.Length( text ) > 0 loop
    Str255List.Pull( text, NextLine );
    if case_insensitive then
       Source := ToUpper( NextLine );
    else
       Source := NextLine;
    end if;
    if Index( Source, ToString( Target ) ) > 0 then -- should rewrite to
       if not filter_out then                         -- get rid of ToString
          Str255List.Queue( Results, NextLine );
       end if;
    else
       if filter_out then
          Str255List.Queue( Results, NextLine );
       end if;
    end if;
 end loop;
 Str255List.Move( Results, text );
end FGREP;
 
procedure SED( sedexpr : str255; text : in out Str255List.List ) is
begin
  Error( TT_NotYetWritten );
end SED;

procedure AWK( awkexpr, text : in out Str255List.List ) is
begin
  Error( TT_NotYetWritten );
end AWK;

---> ASCII Encode/Decode

separator : constant character := character'val(1);

procedure Encode( estr : in out EncodedString; i : integer ) is
begin
  estr := Append( estr, integer'image( i ) );
  estr := Append( estr, separator );
end Encode;
  
procedure Encode( estr : in out EncodedString; r : ARect ) is
begin
  Encode( estr, r.left );
  Encode( estr, r.top );
  Encode( estr, r.right );
  Encode( estr, r.bottom );
end Encode;

procedure Encode( estr : in out EncodedString; l : long_integer ) is
begin
  estr := Append( estr, long_integer'image( l ) );
  estr := Append( estr, separator );
end Encode;

procedure Encode( estr : in out EncodedString; s : str255 ) is
begin
  estr := Append( estr, Strings255.To_String( s ) );
  estr := Append( estr, separator );
end Encode;

procedure Encode( estr : in out EncodedString; c : character ) is
begin
  estr := Append( estr, c );
  -- estr := Strings255.Append( estr, separator );
end Encode;

procedure Encode( estr : in out EncodedString; b : boolean ) is
begin
  if b then
     estr := Append( estr, 'T' );
  else
     estr := Append( estr, 'F' );
  end if;
end Encode;

procedure Decode( estr : in out EncodedString; i : in out integer ) is
  idx : integer := 1;
begin
  while Element( estr, idx ) /= separator loop
    idx := idx + 1;
  end loop;
  i := integer'value( ToString( Head( estr, idx-1 ) ) );
  Delete( estr, 1, idx );
end Decode;
  
procedure Decode( estr : in out EncodedString; r : in out ARect ) is
begin
  Decode( estr, r.left );
  Decode( estr, r.top );
  Decode( estr, r.right );
  Decode( estr, r.bottom );
end Decode;

procedure Decode( estr : in out EncodedString; l : in out long_integer ) is
  idx : integer := 2;
begin
  while Element( estr, idx ) /= separator loop
    idx := idx + 1;
  end loop;
  l := long_integer'value( ToString( Head( estr, idx-1 ) ) );
  Delete( estr, 1, idx );
end Decode;

procedure Decode( estr : in out EncodedString; s : in out str255 ) is
  pos : integer := 1;
begin
  s := NullStr255;
  while Strings255.Element( estr, pos ) /= separator loop
    pos := pos + 1;
  end loop;
  s := Strings255.Head( estr, pos - 1 );
  Delete( estr, 1, pos );
end Decode;

procedure Decode( estr : in out EncodedString; c : in out character ) is
begin
  c := Element( estr, 1 );
  Delete( estr, 1, 1 );
end Decode;

procedure Decode( estr : in out EncodedString; b : in out boolean ) is
  c : character := ASCII.NUL;
begin
  Decode( estr, c );
  b := (c = 'T');
end Decode;

--  BASIC PACK
--
-- Compress string s using dipthong compression resulting in a new string of
-- 50% to 100% the size of the original.  s must contain only lower ASCII
-- characters since the upper ASCII characters are used for the compression.
------------------------------------------------------------------------------

function basic_pack( s : string ) return packed_string is
   dip : string(1..2);
   i : positive;
   dip_pos : natural;
   result : unbounded_string;
begin
   i := s'first;
   result := null_unbounded_string;
   loop
     exit when i > s'last;
     dip_pos := 0;
     if i /= s'last then
        dip := s(i..i+1);
        for j in dips'first..dips'last-1 loop
            if dip = dips(j..j+1) then
               dip_pos := j;
               exit;
            end if;
        end loop;
     end if;
     if dip_pos > 0 then
        result := result & character'val( dip_pos + 127 );
        i := i + 2;
     else
        result := result & s(i);
        i := i + 1;
     end if;
   end loop;
   return packed_string( to_string( result ) );
end basic_pack;

--  UNPACK
--
-- Decompress string s that was compressed using basic_pack.
------------------------------------------------------------------------------

function unpack( s : packed_string ) return string is
   dip_pos : positive;
   newstr : unbounded_string;
begin
   for i in s'range loop
       if character'pos( s(i) ) >= 128 then
          dip_pos := character'pos( s(i) ) - 127;
          newstr := newstr & dips( dip_pos..dip_pos+1 );
       else
          newstr := newstr & s(i);
       end if;
   end loop;
   return to_string( newstr );
end unpack;

end strings;

