1 files changed, 187 insertions, 0 deletions
diff --git a/platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java b/platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java
new file mode 100644
index 000000000000..4547092eb58f
--- /dev/null
+++ b/platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java
@@ -0,0 +1,187 @@
+package com.intellij.tokenindex;
+
+import com.intellij.lang.Language;
+import com.intellij.openapi.fileTypes.LanguageFileType;
+import com.intellij.openapi.vfs.JarFileSystem;
+import com.intellij.openapi.vfs.VirtualFile;
+import com.intellij.structuralsearch.StructuralSearchUtil;
+import com.intellij.util.containers.HashMap;
+import com.intellij.util.indexing.*;
+import com.intellij.util.io.DataExternalizer;
+import com.intellij.util.io.KeyDescriptor;
+import org.jetbrains.annotations.NotNull;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author Eugene.Kudelevsky
+ */
+public class TokenIndex extends FileBasedIndexExtension<TokenIndexKey, List<Token>> {
+  private static final int FILE_BLOCK_SIZE = 100;
+
+  public static final ID<TokenIndexKey, List<Token>> INDEX_ID = ID.create("token.index");
+
+  private static final int VERSION = 3;
+
+  private final KeyDescriptor<TokenIndexKey> myKeyDescriptor = new TokenIndexKeyDescriptor();
+
+  private static final int ANONYM_TOKEN_ID = 0;
+  private static final int TEXT_TOKEN_ID = 1;
+  private static final int MARKER_TOKEN_ID = 2;
+  private static final int INDENT_TOKEN_ID = 3;
+
+  private final DataExternalizer<List<Token>> myDataExternalizer = new DataExternalizer<List<Token>>() {
+    @Override
+    public void save(@NotNull DataOutput out, List<Token> value) throws IOException {
+      out.writeInt(value.size());
+      for (Token token : value) {
+        if (token instanceof AnonymToken) {
+          out.writeByte(ANONYM_TOKEN_ID);
+          out.writeInt(token.getStart());
+          out.writeInt(token.getEnd());
+          out.writeByte(((AnonymToken)token).getType());
+        }
+        else if (token instanceof TextToken) {
+          out.writeByte(TEXT_TOKEN_ID);
+          out.writeInt(token.getStart());
+          out.writeInt(token.getEnd());
+          out.writeInt(((TextToken)token).getHash());
+        }
+        else if (token instanceof PathMarkerToken) {
+          out.writeByte(MARKER_TOKEN_ID);
+          out.writeUTF(((PathMarkerToken)token).getPath());
+        }
+        else if (token instanceof IndentToken) {
+          out.writeByte(INDENT_TOKEN_ID);
+          out.writeInt(token.getStart());
+          out.writeInt(token.getEnd());
+        }
+        else {
+          assert false : "Unsupported token type " + token.getClass();
+        }
+      }
+    }
+
+    @Override
+    public List<Token> read(@NotNull DataInput in) throws IOException {
+      List<Token> result = new ArrayList<Token>();
+      int n = in.readInt();
+      for (int i = 0; i < n; i++) {
+        byte tokenTypeId = in.readByte();
+        switch (tokenTypeId) {
+          case ANONYM_TOKEN_ID: {
+            int start = in.readInt();
+            int end = in.readInt();
+            byte anonymTokenTypeValue = in.readByte();
+            result.add(new AnonymToken(anonymTokenTypeValue, start, end));
+            break;
+          }
+          case TEXT_TOKEN_ID: {
+            int start = in.readInt();
+            int end = in.readInt();
+            int hash = in.readInt();
+            result.add(new TextToken(hash, start, end));
+            break;
+          }
+          case MARKER_TOKEN_ID: {
+            String path = in.readUTF();
+            result.add(new PathMarkerToken(path));
+            break;
+          }
+          case INDENT_TOKEN_ID:
+            int start = in.readInt();
+            int end = in.readInt();
+            result.add(new IndentToken(start, end));
+            break;
+        }
+      }
+      return result;
+    }
+  };
+
+  @NotNull
+  @Override
+  public ID<TokenIndexKey, List<Token>> getName() {
+    return INDEX_ID;
+  }
+
+  private static int getBlockId(String filePath) {
+    int h = filePath.hashCode();
+    if (h < 0) {
+      h = -h;
+    }
+    return h % FILE_BLOCK_SIZE;
+  }
+
+  @NotNull
+  @Override
+  public DataIndexer<TokenIndexKey, List<Token>, FileContent> getIndexer() {
+    return new DataIndexer<TokenIndexKey, List<Token>, FileContent>() {
+      @Override
+      @NotNull
+      public Map<TokenIndexKey, List<Token>> map(@NotNull FileContent inputData) {
+        if (true) return Collections.EMPTY_MAP; // TODO: Eugene index is VERY unefficient and leads to OME
+        Map<TokenIndexKey, List<Token>> result = new HashMap<TokenIndexKey, List<Token>>(1);
+        RecursiveTokenizingVisitor visitor = new RecursiveTokenizingVisitor();
+        inputData.getPsiFile().accept(visitor);
+        List<Token> tokens = visitor.getTokens();
+        if (tokens.size() > 0) {
+          String path = inputData.getFile().getPath();
+          tokens.add(new PathMarkerToken(path));
+          TokenIndexKey key = new TokenIndexKey(visitor.getLanguages(), getBlockId(path));
+          result.put(key, tokens);
+        }
+        return result;
+      }
+    };
+  }
+
+  @NotNull
+  @Override
+  public KeyDescriptor<TokenIndexKey> getKeyDescriptor() {
+    return myKeyDescriptor;
+  }
+
+  @NotNull
+  @Override
+  public DataExternalizer<List<Token>> getValueExternalizer() {
+    return myDataExternalizer;
+  }
+
+  @NotNull
+  @Override
+  public FileBasedIndex.InputFilter getInputFilter() {
+    return new FileBasedIndex.InputFilter() {
+      @Override
+      public boolean acceptInput(@NotNull VirtualFile file) {
+        if (file.getFileSystem() instanceof JarFileSystem) return false;
+        return file.getFileType() instanceof LanguageFileType;
+      }
+    };
+  }
+
+  @Override
+  public boolean dependsOnFileContent() {
+    return true;
+  }
+
+  @Override
+  public int getVersion() {
+    return VERSION;
+  }
+
+  @Override
+  public int getCacheSize() {
+    return 1;
+  }
+
+  public static boolean supports(Language language) {
+    return StructuralSearchUtil.getTokenizerForLanguage(language) != null;
+  }
+}