unit UCS4Helper;

{$mode objfpc}{$H+}{$modeswitch typehelpers}


//  Unit UCS4Helper by Jorg3000, Version 1.0 from 2023-12-24


interface


type
     { TUCS4Helper }

     TUCS4Helper = type helper for UCS4String

       function  GetLength: SizeInt;
       procedure SetLength(NewLen: SizeInt);

       procedure SetStr(const s: String);        overload;
       procedure SetStr(const ws: WideString);     inline;
       procedure SetStr(const ws: UnicodeString);  inline;

       procedure Add(const s:  String);      overload;
       procedure Add(const us: UCS4String);

       function  CharAsUtf8(i: SizeInt): String;
       procedure SetChar(i: SizeInt; const s: String);

       function  toUtf8:  String;
       function  toUtf16: UnicodeString;  inline;

       function  Copy(Index, Len: SizeInt): UCS4String;
       function  CopyAsUtf8(Index, Len: SizeInt): String;

       procedure Delete(Index, Len: SizeInt);

       procedure Insert(const Source: String;     Index: SizeInt);  overload;
       procedure Insert(const Source: UCS4String; Index: SizeInt);

       function  Pos(const s:  String;     Offset: SizeInt=1): SizeInt;  overload;
       function  Pos(const us: UCS4String; Offset: SizeInt=1): SizeInt;

       property  Length: SizeInt  read GetLength  write SetLength;

       property  Chars[i: SizeInt]: String  read CharAsUtf8  write SetChar;  // "default" funzt nicht weil UCS4String = Array
     end;


implementation


{ TUCS4Helper }


function TUCS4Helper.GetLength: SizeInt;
begin
  Result:=System.Length(self);
  if Result>0 then dec(Result);
end;


procedure TUCS4Helper.SetLength(NewLen: SizeInt);
var OldLen: SizeInt;
begin
  if NewLen<=0 then begin self:=nil; Exit; end;

  OldLen:=self.GetLength;
  if OldLen=NewLen then Exit;

  System.SetLength(self,NewLen+1);
  self[NewLen]:=0;
end;


procedure TUCS4Helper.SetStr(const s: String);
begin
  if s='' then self:=nil
          else self:=WideStringToUCS4String(WideString(s));
end;


procedure TUCS4Helper.SetStr(const ws: WideString);
begin
  self:=WideStringToUCS4String(ws);
end;


procedure TUCS4Helper.SetStr(const ws: UnicodeString);
begin
  self:=UnicodeStringToUCS4String(ws);
end;


procedure TUCS4Helper.Add(const s: String);
begin
  self.Add( WideStringToUCS4String(WideString(s)) );
end;


procedure TUCS4Helper.Add(const us: UCS4String);
var Len1, Len2: SizeInt;
begin
  Len2:=System.Length(us)-1;
  if Len2<=0 then Exit;

  Len1:=System.Length(self)-1;
  if Len1<=0 then begin self:=us; Exit; end;

  System.SetLength(self,Len1+Len2+1);
  Move( us[0], self[Len1], (Len2+1)*SizeOf(UCS4Char) );
end;


procedure decodeUCS4Char(c4: UCS4Char; out w1, w2: WideChar);  // ähnlich wie in ustrings.inc:UCS4Decode()
begin
  w2:=#0;  // default for 1 UTF16 codepoint only
  if c4<=$ffff then begin w1:=WideChar(Lo(c4)); Exit; end;
  if DWord(c4)>$10ffff then begin w1:='?'; Exit; end;  // invalid codepoint
  w1:=WideChar(c4 shr 10 + $d7c0);
  w2:=WideChar(c4 and $3ff + $dc00);
end;


function TUCS4Helper.CharAsUtf8(i: SizeInt): String;
var c4: UCS4Char;
    w1, w2: WideChar;
    ws: WideString;
begin
  c4:=self[i-1];
  if c4<128 then begin System.SetLength(Result,1); Result[1]:=Char(c4); Exit; end;  // shortcut for ASCII

  decodeUCS4Char(c4,{out}w1,w2);
  if w2=#0 then begin System.SetLength(ws,1); ws[1]:=w1; end
           else begin System.SetLength(ws,2); ws[1]:=w1; ws[2]:=w2; end;
  Result:=UTF8Encode(ws);
end;


procedure TUCS4Helper.SetChar(i: SizeInt; const s: String);
var us: UCS4String;
    Len2: SizeInt;
begin
  us:=WideStringToUCS4String(WideString(s));
  Len2:=System.Length(us)-1;
  if Len2<=0 then Exit;
  self[i-1]:=us[0];
end;


function TUCS4Helper.toUtf8: String;
begin
  Result:=UTF8Encode(UCS4StringToWideString(self));
end;


function TUCS4Helper.toUtf16: UnicodeString;   inline;
begin
  Result:=UCS4StringToUnicodeString(self);
end;


function TUCS4Helper.Copy(Index, Len: SizeInt): UCS4String;
var Len1, Len2: SizeInt;
begin
  if (Index<=0) or (Len<=0) then Exit;

  Len1:=self.GetLength;
  Len2:=Len1+1-Index;  // maximal mögliche Länge ab Index
  if Len2<=0 then Exit;
  if Len<Len2 then Len2:=Len;

  System.SetLength(Result,Len2+1);
  Move( self[Index-1], Result[0], Len2*SizeOf(UCS4Char) );
  Result[Len2]:=0;
end;


function TUCS4Helper.CopyAsUtf8(Index, Len: SizeInt): String;
begin
  Result:=UTF8Encode(UCS4StringToWideString( self.Copy(Index,Len) ));
end;


procedure TUCS4Helper.Delete(Index, Len: SizeInt);
var Len1, Len2: SizeInt;
begin
  if (Index<=0) or (Len<=0) then Exit;

  Len1:=self.GetLength;
  if (Len1<=0) or (Index>Len1) then Exit;

  Len2:=Len1+1-Index;  // maximal mögliche Länge ab Index
  if Len2<=0 then Exit;
  if Len<Len2 then Len2:=Len;

  System.Delete(self,Index-1,Len2);
  // Move( self[Index+Len2-1], self[Index-1], (Len2+1)*SizeOf(UCS4Char) );
  // System.SetLength(self,Index+Len2);
end;


procedure TUCS4Helper.Insert(const Source: String; Index: SizeInt);
begin
  self.Insert( WideStringToUCS4String(WideString(Source)), Index);
end;


procedure TUCS4Helper.Insert(const Source: UCS4String; Index: SizeInt);
var Len2: SizeInt;
begin
  if Index<=0 then Exit;
  Len2:=Source.GetLength;
  if Len2<=0 then Exit;
  System.Insert(Source,{var}self,Index-1);
  System.Delete({var}self,Index-1+Len2,1);  // remove the terminating zero of Source
end;


function TUCS4Helper.Pos(const s: String; Offset: SizeInt=1): SizeInt;
begin
  Result:=self.Pos( WideStringToUCS4String(WideString(s)), Offset );
end;


function TUCS4Helper.Pos(const us: UCS4String; Offset: SizeInt=1): SizeInt;
var i, Len1, Len2, Len2Bytes: SizeInt;
begin
  Result:=0;
  Len1:=System.Length(self)-1;
  if Len1<=0 then Exit{0};

  Len2:=System.Length(us)-1;
  if Len2<=0 then Exit{0};

  if Offset<1 then Offset:=1;
  dec(Offset);
  if Len2>Len1-Offset then Exit{0};

  Len2Bytes:=Len2*SizeOf(UCS4Char);

  // note "dec(Offset)" see above  =>  e.g. Offset=0
  for i:=Offset to Len1-Len2 do
    if CompareByte(us[0],self[i],Len2Bytes)=0 then Exit(i+1);
end;



end.




