summaryrefslogtreecommitdiff
path: root/platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java
diff options
context:
space:
mode:
Diffstat (limited to 'platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java')
-rw-r--r--platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java187
1 files changed, 187 insertions, 0 deletions
diff --git a/platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java b/platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java
new file mode 100644
index 000000000000..4547092eb58f
--- /dev/null
+++ b/platform/structuralsearch/source/com/intellij/tokenindex/TokenIndex.java
@@ -0,0 +1,187 @@
+package com.intellij.tokenindex;
+
+import com.intellij.lang.Language;
+import com.intellij.openapi.fileTypes.LanguageFileType;
+import com.intellij.openapi.vfs.JarFileSystem;
+import com.intellij.openapi.vfs.VirtualFile;
+import com.intellij.structuralsearch.StructuralSearchUtil;
+import com.intellij.util.containers.HashMap;
+import com.intellij.util.indexing.*;
+import com.intellij.util.io.DataExternalizer;
+import com.intellij.util.io.KeyDescriptor;
+import org.jetbrains.annotations.NotNull;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * @author Eugene.Kudelevsky
+ */
+public class TokenIndex extends FileBasedIndexExtension<TokenIndexKey, List<Token>> {
+ private static final int FILE_BLOCK_SIZE = 100;
+
+ public static final ID<TokenIndexKey, List<Token>> INDEX_ID = ID.create("token.index");
+
+ private static final int VERSION = 3;
+
+ private final KeyDescriptor<TokenIndexKey> myKeyDescriptor = new TokenIndexKeyDescriptor();
+
+ private static final int ANONYM_TOKEN_ID = 0;
+ private static final int TEXT_TOKEN_ID = 1;
+ private static final int MARKER_TOKEN_ID = 2;
+ private static final int INDENT_TOKEN_ID = 3;
+
+ private final DataExternalizer<List<Token>> myDataExternalizer = new DataExternalizer<List<Token>>() {
+ @Override
+ public void save(@NotNull DataOutput out, List<Token> value) throws IOException {
+ out.writeInt(value.size());
+ for (Token token : value) {
+ if (token instanceof AnonymToken) {
+ out.writeByte(ANONYM_TOKEN_ID);
+ out.writeInt(token.getStart());
+ out.writeInt(token.getEnd());
+ out.writeByte(((AnonymToken)token).getType());
+ }
+ else if (token instanceof TextToken) {
+ out.writeByte(TEXT_TOKEN_ID);
+ out.writeInt(token.getStart());
+ out.writeInt(token.getEnd());
+ out.writeInt(((TextToken)token).getHash());
+ }
+ else if (token instanceof PathMarkerToken) {
+ out.writeByte(MARKER_TOKEN_ID);
+ out.writeUTF(((PathMarkerToken)token).getPath());
+ }
+ else if (token instanceof IndentToken) {
+ out.writeByte(INDENT_TOKEN_ID);
+ out.writeInt(token.getStart());
+ out.writeInt(token.getEnd());
+ }
+ else {
+ assert false : "Unsupported token type " + token.getClass();
+ }
+ }
+ }
+
+ @Override
+ public List<Token> read(@NotNull DataInput in) throws IOException {
+ List<Token> result = new ArrayList<Token>();
+ int n = in.readInt();
+ for (int i = 0; i < n; i++) {
+ byte tokenTypeId = in.readByte();
+ switch (tokenTypeId) {
+ case ANONYM_TOKEN_ID: {
+ int start = in.readInt();
+ int end = in.readInt();
+ byte anonymTokenTypeValue = in.readByte();
+ result.add(new AnonymToken(anonymTokenTypeValue, start, end));
+ break;
+ }
+ case TEXT_TOKEN_ID: {
+ int start = in.readInt();
+ int end = in.readInt();
+ int hash = in.readInt();
+ result.add(new TextToken(hash, start, end));
+ break;
+ }
+ case MARKER_TOKEN_ID: {
+ String path = in.readUTF();
+ result.add(new PathMarkerToken(path));
+ break;
+ }
+ case INDENT_TOKEN_ID:
+ int start = in.readInt();
+ int end = in.readInt();
+ result.add(new IndentToken(start, end));
+ break;
+ }
+ }
+ return result;
+ }
+ };
+
+ @NotNull
+ @Override
+ public ID<TokenIndexKey, List<Token>> getName() {
+ return INDEX_ID;
+ }
+
+ private static int getBlockId(String filePath) {
+ int h = filePath.hashCode();
+ if (h < 0) {
+ h = -h;
+ }
+ return h % FILE_BLOCK_SIZE;
+ }
+
+ @NotNull
+ @Override
+ public DataIndexer<TokenIndexKey, List<Token>, FileContent> getIndexer() {
+ return new DataIndexer<TokenIndexKey, List<Token>, FileContent>() {
+ @Override
+ @NotNull
+ public Map<TokenIndexKey, List<Token>> map(@NotNull FileContent inputData) {
+ if (true) return Collections.EMPTY_MAP; // TODO: Eugene index is VERY unefficient and leads to OME
+ Map<TokenIndexKey, List<Token>> result = new HashMap<TokenIndexKey, List<Token>>(1);
+ RecursiveTokenizingVisitor visitor = new RecursiveTokenizingVisitor();
+ inputData.getPsiFile().accept(visitor);
+ List<Token> tokens = visitor.getTokens();
+ if (tokens.size() > 0) {
+ String path = inputData.getFile().getPath();
+ tokens.add(new PathMarkerToken(path));
+ TokenIndexKey key = new TokenIndexKey(visitor.getLanguages(), getBlockId(path));
+ result.put(key, tokens);
+ }
+ return result;
+ }
+ };
+ }
+
+ @NotNull
+ @Override
+ public KeyDescriptor<TokenIndexKey> getKeyDescriptor() {
+ return myKeyDescriptor;
+ }
+
+ @NotNull
+ @Override
+ public DataExternalizer<List<Token>> getValueExternalizer() {
+ return myDataExternalizer;
+ }
+
+ @NotNull
+ @Override
+ public FileBasedIndex.InputFilter getInputFilter() {
+ return new FileBasedIndex.InputFilter() {
+ @Override
+ public boolean acceptInput(@NotNull VirtualFile file) {
+ if (file.getFileSystem() instanceof JarFileSystem) return false;
+ return file.getFileType() instanceof LanguageFileType;
+ }
+ };
+ }
+
+ @Override
+ public boolean dependsOnFileContent() {
+ return true;
+ }
+
+ @Override
+ public int getVersion() {
+ return VERSION;
+ }
+
+ @Override
+ public int getCacheSize() {
+ return 1;
+ }
+
+ public static boolean supports(Language language) {
+ return StructuralSearchUtil.getTokenizerForLanguage(language) != null;
+ }
+}