tools/refactoring/stringmanipulation.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303

import string

# returns tuple, [success,updated_string] where the updated string has
# has one less (the first) occurance of match string
def removefirstoccurance( remove_string, match_string ):
    lowercase_string = remove_string.lower()
    lowercase_match_string = match_string.lower()
    lowest_index = lowercase_string.find(lowercase_match_string)
    if(lowest_index == -1):
        return [False,remove_string]
    past_match_index = lowest_index + len(lowercase_match_string)
    highest_index = len(remove_string)
    remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
    return [True,remove_string]

# returns a string with all occurances of match_string removed
def removealloccurances( remove_string, match_string ):
    return_value = [True, remove_string]
    while(return_value[0]):
        return_value = removefirstoccurance(return_value[1],match_string)
    return return_value[1]

# removes an occurance of match_string only if it's first in the string
# returns tuple [succes, new_string]
def removeprefix( remove_string, match_string ):
    lowercase_string = remove_string.lower()
    lowercase_match_string = match_string.lower()
    lowest_index = lowercase_string.find(lowercase_match_string)
    if(lowest_index == -1):
        return [False,remove_string]
    if(lowest_index != 0):
        return [False,remove_string]
    past_match_index = lowest_index + len(lowercase_match_string)
    highest_index = len(remove_string)
    remove_string = remove_string[0:lowest_index] + remove_string[past_match_index: highest_index]
#    print lowest_index
#    print past_match_index
    return [True,remove_string]

# removes multiple occurances of match string as long as they are first in
# the string
def removeallprefix( remove_string, match_string ):
    return_value = [True, remove_string]
    while(return_value[0]):
        return_value = removeprefix(return_value[1],match_string)
    return return_value[1]

# returns true if extensionstring is a correct extension
def isextension( extensionstring ):
    if(len(extensionstring) < 2):
        return False
    if(extensionstring[0] != '.'):
        return False
    if(extensionstring[1:len(extensionstring)-1].find('.') != -1):
        return False
    return True

# returns the index of start of the last occurance of match_string
def findlastoccurance( original_string, match_string ):
    search_index = original_string.find(match_string)
    found_index = search_index
    last_index = len(original_string) - 1
    while((search_index != -1) and (search_index < last_index)):
        search_index = original_string[search_index+1:last_index].find(match_string)
        if(search_index != -1):
            found_index = search_index
    return found_index

# changes extension from original_extension to new_extension
def changeextension( original_string, original_extension, new_extension):
    if(not isextension(original_extension)):
        return original_string
    if(not isextension(new_extension)):
        return original_string
    index = findlastoccurance(original_string, original_extension)
    if(index == -1):
        return original_string
    return_value = original_string[0:index] + new_extension
    return return_value

# wanted to do this with str.find however didnt seem to work so do it manually
# returns the index of the first capital letter
def findfirstcapitalletter( original_string ):
    for index in range(len(original_string)):
        if(original_string[index].lower() != original_string[index]):
            return index
    return -1


# replaces capital letters with underscore and lower case letter (except very
# first
def lowercasewithunderscore( original_string ):
# ignore the first letter since there should be no underscore in front of it
    if(len(original_string) < 2):
        return original_string
    return_value = original_string[1:len(original_string)]
    index = findfirstcapitalletter(return_value)
    while(index != -1):
        return_value = return_value[0:index] + \
                       '_' + \
                       return_value[index].lower() + \
                       return_value[index+1:len(return_value)]
        index = findfirstcapitalletter(return_value)
    return_value = original_string[0].lower() + return_value
    return return_value

# my table is a duplicate of strings
def removeduplicates( my_table ):
    new_table = []
    for old_string1, new_string1 in my_table:
        found = 0
        for old_string2, new_string2 in new_table:
            if(old_string1 == old_string2):
                found += 1
            if(new_string1 == new_string2):
                if(new_string1 == ''):
                    found += found
                else:
                    found += 1
            if(found == 1):
                print 'missmatching set, terminating program'
                print old_string1
                print new_string1
                print old_string2
                print new_string2
                quit()
            if(found == 2):
                break
        if(found == 0):
            new_table.append([old_string1,new_string1])
    return new_table

def removenochange( my_table ):
    new_table = []
    for old_string, new_string in my_table:
        if(old_string != new_string):
            new_table.append([old_string,new_string])
    return new_table

# order table after size of the string (can be used to replace bigger strings
# first which is useful since smaller strings can be inside the bigger string)
# E.g. GIPS is a sub string of GIPSVE if we remove GIPS first GIPSVE will never
# be removed. N is small so no need for fancy sort algorithm. Use selection sort
def ordertablesizefirst( my_table ):
    for current_index in range(len(my_table)):
        biggest_string = 0
        biggest_string_index = -1
        for search_index in range(len(my_table)):
            if(search_index < current_index):
                continue
            length_of_string = len(my_table[search_index][0])
            if(length_of_string > biggest_string):
                biggest_string = length_of_string
                biggest_string_index = search_index
        if(biggest_string_index == -1):
            print 'sorting algorithm failed, program exit'
            quit()
        old_value = my_table[current_index]
        my_table[current_index] = my_table[biggest_string_index]
        my_table[biggest_string_index] = old_value
    return my_table

# returns true if string 1 or 2 is a substring of the other, assuming neither
# has whitespaces
def issubstring( string1, string2 ):
    if(len(string1) == 0):
        return -1
    if(len(string2) == 0):
        return -1
    large_string = string1
    small_string = string2
    if(len(string1) < len(string2)):
        large_string = string2
        small_string = string1

    for index in range(len(large_string)):
        large_sub_string = large_string[index:index+len(small_string)].lower()
        if(large_sub_string ==\
           small_string.lower()):
              return index
    return -1

#not_part_of_word_table = [' ','(',')','{','}',':','\t','*','&','/','[',']','.',',','\n']
#def ispartofword( char ):
#    for item in not_part_of_word_table:
#        if(char == item):
#            return False
#    return True

# must be numerical,_ or charachter
def ispartofword( char ):
    if(char.isalpha()):
        return True
    if(char.isalnum()):
        return True
    if(char == '_'):
        return True
    return False

# returns the index of the first letter in the word that the current_index
# is pointing to and the size of the word
def getword( line, current_index):
    if(current_index < 0):
        return []
    line = line.rstrip()
    if(len(line) <= current_index):
        return []
    if(line[current_index] == ' '):
        return []
    start_pos = current_index
    while start_pos >= 0:
        if(not ispartofword(line[start_pos])):
            start_pos += 1
            break
        start_pos -= 1
    if(start_pos == -1):
        start_pos = 0
    end_pos = current_index
    while end_pos < len(line):
        if(not ispartofword(line[end_pos])):
            break
        end_pos += 1
    return [start_pos,end_pos - start_pos]

# my table is a tuple [string1,string2] complement_to_table is just a list
# of strings to compare to string1
def complement( my_table, complement_to_table ):
    new_table = []
    for index in range(len(my_table)):
        found = False;
        for compare_string in complement_to_table:
            if(my_table[index][0].lower() == compare_string.lower()):
                found = True
        if(not found):
            new_table.append(my_table[index])
    return new_table

def removestringfromhead( line, remove_string):
    for index in range(len(line)):
        if(line[index:index+len(remove_string)] != remove_string):
            return line[index:index+len(line)]
    return ''

def removeccomment( line ):
    comment_string = '//'
    for index in range(len(line)):
        if(line[index:index+len(comment_string)] == comment_string):
            return line[0:index]
    return line

def whitespacestoonespace( line ):
    return ' '.join(line.split())

def fixabbreviations( original_string ):
    previouswascapital = (original_string[0].upper() == original_string[0])
    new_string = ''
    for index in range(len(original_string)):
        if(index == 0):
            new_string += original_string[index]
            continue
        if(original_string[index] == '_'):
            new_string += original_string[index]
            previouswascapital = False
            continue
        if(original_string[index].isdigit()):
            new_string += original_string[index]
            previouswascapital = False
            continue
        currentiscapital = (original_string[index].upper() == original_string[index])
        letter_to_add = original_string[index]
        if(previouswascapital and currentiscapital):
            letter_to_add = letter_to_add.lower()
        if(previouswascapital and (not currentiscapital)):
            old_letter = new_string[len(new_string)-1]
            new_string = new_string[0:len(new_string)-1]
            new_string += old_letter.upper()
        previouswascapital = currentiscapital
        new_string += letter_to_add
    return new_string

def replaceoccurances(old_string, replace_string, replace_with_string):
    if (len(replace_string) == 0):
        return old_string
    if (len(old_string) < len(replace_string)):
        return old_string
    # Simple implementation, could proably be done smarter
    new_string = ''
    for index in range(len(old_string)):
        #print new_string
        if(len(replace_string) > (len(old_string) - index)):
            new_string += old_string[index:index + len(old_string)]
            break
        match = (len(replace_string) > 0)
        for replace_index in range(len(replace_string)):
            if (replace_string[replace_index] != old_string[index + replace_index]):
                match = False
                break
        if (match):
            new_string += replace_with_string
            index =+ len(replace_string)
        else:
            new_string += old_string[index]
    return new_string