Răsfoiți Sursa

Remove defunct gawk scripts that generated a very old version of this table.

Doug Ilijev 8 ani în urmă
părinte
comite
363d7c36ad
1 a modificat fișierele cu 0 adăugiri și 149 ștergeri
  1. 0 149
      lib/Parser/CaseInsensitive.cpp

+ 0 - 149
lib/Parser/CaseInsensitive.cpp

@@ -56,155 +56,6 @@ namespace UnifiedRegex
             }
         };
 
-/*
-We first construct a total map from character codes to equivalence lists such that:
- - if ToUpper(c1) == ToUpper(c2) then c1 has c2 in its equivalence list
- - if c1 and c2 appear in the same equivalence list then c1 and c2 have equal equivalence lists
-
-We then compress the above map such that:
- - characters with singleton equivalence classes are elided
- - consecutive characters with consecutive equivalence lists are represented as a range and delta
- - the result is in strictly increasing range order
-
-Using gawk the above is:
-  gawk -f equiv.gawk http://www.unicode.org/Public/8.0.0/ucd/UnicodeData.txt | gawk -f table.gawk
-
-Where equiv.gawk is:
-----------------------------------------------------------------------
-BEGIN {
-  FS = ";";
-  previncode = -1;
-}
-length($1) == 4 {
-  incode = strtonum("0x" $1);
-  for (i = previncode + 1; i < incode; i++)
-    map[i] = i;
-  if ($3 == "Ll" && $15 != "")
-  {
-    map[incode] = strtonum("0x" $15);
-    # non-7-bit-ASCII cannot map to 7-bit-ASCII
-    if (incode > 127 && map[incode] <= 127)
-      map[incode] = incode;
-  }
-  else
-    map[incode] = incode;
-  previncode = incode;
-}
-END {
-  for (i = previncode + 1; i <= 0xffff; i++)
-    map[i] = i;
-
-  for (i = 0x0000; i <= 0xffff; i++)
-    ninv[i] = 0;
-
-  for (i = 0x0000; i <= 0xffff; i++)
-  {
-    if (map[i] != i)
-      ninv[map[i]]++;
-  }
-
-  maxninv = 0;
-  for (i = 0x0000; i <= 0xffff; i++)
-  {
-    if (ninv[i] > maxninv)
-      maxninv = ninv[i];
-  }
-  if (maxninv > 2)
-    print "ERROR";
-
-  for (i = 0x0000; i <= 0xffff; i++)
-    inv[i] = "";
-
-  for (i = 0x0000; i <= 0xffff; i++)
-  {
-    if (map[i] != i)
-      inv[map[i]] = sprintf("%s;0x%04x", inv[map[i]], i);
-  }
-
-  for (i = 0x0000; i <= 0xffff; i++)
-  {
-    if (map[i] != i)
-    {
-      equiv[i] = sprintf("0x%04x%s", map[i], inv[map[i]]);
-      nequiv[i] = 1 + ninv[map[i]];
-    }
-    else if (inv[i] != "")
-    {
-      equiv[i] = sprintf("0x%04x%s", i, inv[i]);
-      nequiv[i] = 1 + ninv[i];
-    }
-    else
-    {
-      equiv[i] = sprintf("0x%04x", i);
-      nequiv[i] = 1;
-    }
-  }
-
-  nentries = 0
-  for (i = 0x0000; i <= 0xffff; i++)
-  {
-    if (nequiv[i] > 1)
-    {
-      printf("0x%04x;%s\n", i, equiv[i]);
-      nentries++;
-    }
-  }
-  #printf("nentries = %d\n", nentries);
-}
-----------------------------------------------------------------------
-
-And table.gawk is:
-----------------------------------------------------------------------
-BEGIN {
-  FS = ";";
-  lastCode = -1;
-  currStart = -1;
-  for (i = 0; i < 3; i++)
-    currDeltas[i] = "";
-}
-{
-  if (NF > 4)
-    print "ERROR"
-
-  incode = strtonum($1);
-  for (i = 0; i < NF - 1; i++)
-    equivs[i] = strtonum($(i+2));
-  for (i = NF - 1; i < 3; i++)
-    equivs[i] = equivs[i - 1];
-
-  #printf("0x%04x, 0x%04x, 0x%04x, 0x%04x\n", incode, equivs[0], equivs[1], equivs[2]);
-
-  for (i = 0; i < 3; i++)
-    deltas[i] = equivs[i] - incode;
-
-  if (currStart < 0)
-  {
-    # start a new range
-    currStart = incode;
-    for (i = 0; i < 3; i++)
-      currDeltas[i] = deltas[i]
-  }
-  else if (incode == lastCode + 1 && deltas[0] == currDeltas[0] && deltas[1] == currDeltas[1] && deltas[2] == currDeltas[2])
-  {
-    # keep accumulating range
-  }
-  else
-  {
-    # dump current range and start a new one
-    printf("            0x%04x, 0x%04x, %d, %d, %d,\n", currStart, lastCode, currDeltas[0], currDeltas[1], currDeltas[2]);
-    currStart = incode;
-    for (i = 0; i < 3; i++)
-      currDeltas[i] = deltas[i]
-  }
-
-  lastCode = incode;
-}
-END {
-  printf("            0x%04x, 0x%04x, %d, %d, %d,\n", currStart, lastCode, currDeltas[0], currDeltas[1], currDeltas[2]);
-}
-----------------------------------------------------------------------
-*/
-
         // For case-folding entries, version 8.0.0 of CaseFolding.txt located at [1] was used.
         // [1] http://www.unicode.org/Public/8.0.0/ucd/CaseFolding.txt
         static constexpr Transform transforms[] =