3

SSE2 アセンブリ (32 ビット) を使用してこの Delphi 関数を実装する方法のヒントが必要です。他の最適化も大歓迎です。たぶん、どのような種類の命令を使用できるかを教えてくれるので、さらに読むための出発点があります.

実際:

const Precision = 10000;

// This function adds all Pixels into one. The pixels are weighted before adding. 
// A weight can range from 0 to "Precision". "Size" is typically 10 to 50.

function TFilter.Combine(Pixels: PByte; Weights: PCardinal; const Size: Cardinal): Cardinal;
var
  i, R, G, B, A: Cardinal;
begin
  B := Pixels^ * Weights^; Inc(Pixels);
  G := Pixels^ * Weights^; Inc(Pixels);
  R := Pixels^ * Weights^; Inc(Pixels);
  A := Pixels^ * Weights^; Inc(Pixels);
  Inc(Weights); // goto next weight
  for i := 1 to Size - 1 do
  begin
    Inc(B, Pixels^ * Weights^); Inc(Pixels);
    Inc(G, Pixels^ * Weights^); Inc(Pixels);
    Inc(R, Pixels^ * Weights^); Inc(Pixels);
    Inc(A, Pixels^ * Weights^); Inc(Pixels);
    Inc(Weights); // goto next weight
  end;
  B := B div Precision;
  G := G div Precision;
  R := R div Precision;
  A := A div Precision;

  Result := A shl 24 or R shl 16 or G shl 8 or B;
end;

期待される:

function TFilter.Combine(Pixels: PByte; Weights: PCardinal; const Size: Cardinal): Cardinal;
asm
  // Insert fast SSE2-Code here ;-)
end;
4

1 に答える 1