aboutsummaryrefslogtreecommitdiff
path: root/icing/monkey_test/monkey-test-common-words.h
blob: f0ed08a65cadd3bd87f20119647531ffa5872cf9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
// Copyright (C) 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef ICING_MONKEY_TEST_MONKEY_TEST_COMMON_WORDS_H_
#define ICING_MONKEY_TEST_MONKEY_TEST_COMMON_WORDS_H_

#include <array>
#include <string_view>

namespace icing {
namespace lib {

// A bag of words in English for creating random documents. Only words that are
// at least 3 letters long are included (that's kPrefixLength) so that prefix
// queries are easily formed from any word in a random document.
// Data source:
// https://chromium.googlesource.com/chromium/src/+/HEAD/components/url_formatter/spoof_checks/common_words/data/
static constexpr std::array<std::string_view, 1000> kCommonWords = {
    "the",          "and",           "for",           "that",
    "this",         "with",          "you",           "not",
    "are",          "from",          "your",          "all",
    "have",         "new",           "more",          "was",
    "will",         "home",          "can",           "about",
    "page",         "has",           "search",        "free",
    "but",          "our",           "one",           "other",
    "information",  "time",          "they",          "site",
    "may",          "what",          "which",         "their",
    "news",         "out",           "use",           "any",
    "there",        "see",           "only",          "his",
    "when",         "contact",       "here",          "business",
    "who",          "web",           "also",          "now",
    "help",         "get",           "view",          "online",
    "first",        "been",          "would",         "how",
    "were",         "services",      "some",          "these",
    "click",        "its",           "like",          "service",
    "than",         "find",          "price",         "date",
    "back",         "top",           "people",        "had",
    "list",         "name",          "just",          "over",
    "state",        "year",          "day",           "into",
    "email",        "two",           "health",        "world",
    "next",         "used",          "work",          "last",
    "most",         "products",      "music",         "buy",
    "data",         "make",          "them",          "should",
    "product",      "system",        "post",          "her",
    "city",         "add",           "policy",        "number",
    "such",         "please",        "available",     "copyright",
    "support",      "message",       "after",         "best",
    "software",     "then",          "jan",           "good",
    "video",        "well",          "where",         "info",
    "rights",       "public",        "books",         "high",
    "school",       "through",       "each",          "links",
    "she",          "review",        "years",         "order",
    "very",         "privacy",       "book",          "items",
    "company",      "read",          "group",         "sex",
    "need",         "many",          "user",          "said",
    "does",         "set",           "under",         "general",
    "research",     "university",    "january",       "mail",
    "full",         "map",           "reviews",       "program",
    "life",         "know",          "games",         "way",
    "days",         "management",    "part",          "could",
    "great",        "united",        "hotel",         "real",
    "item",         "international", "center",        "must",
    "store",        "travel",        "comments",      "made",
    "development",  "report",        "off",           "member",
    "details",      "line",          "terms",         "before",
    "hotels",       "did",           "send",          "right",
    "type",         "because",       "local",         "those",
    "using",        "results",       "office",        "education",
    "national",     "car",           "design",        "take",
    "posted",       "internet",      "address",       "community",
    "within",       "states",        "area",          "want",
    "phone",        "dvd",           "shipping",      "reserved",
    "subject",      "between",       "forum",         "family",
    "long",         "based",         "code",          "show",
    "even",         "black",         "check",         "special",
    "prices",       "website",       "index",         "being",
    "women",        "much",          "sign",          "file",
    "link",         "open",          "today",         "technology",
    "south",        "case",          "project",       "same",
    "pages",        "version",       "section",       "own",
    "found",        "sports",        "house",         "related",
    "security",     "both",          "county",        "american",
    "photo",        "game",          "members",       "power",
    "while",        "care",          "network",       "down",
    "computer",     "systems",       "three",         "total",
    "place",        "end",           "following",     "download",
    "him",          "without",       "per",           "access",
    "think",        "north",         "resources",     "current",
    "posts",        "big",           "media",         "law",
    "control",      "water",         "history",       "pictures",
    "size",         "art",           "personal",      "since",
    "including",    "guide",         "shop",          "directory",
    "board",        "location",      "change",        "white",
    "text",         "small",         "rating",        "rate",
    "government",   "children",      "during",        "usa",
    "return",       "students",      "shopping",      "account",
    "times",        "sites",         "level",         "digital",
    "profile",      "previous",      "form",          "events",
    "love",         "old",           "john",          "main",
    "call",         "hours",         "image",         "department",
    "title",        "description",   "non",           "insurance",
    "another",      "why",           "shall",         "property",
    "class",        "still",         "money",         "quality",
    "every",        "listing",       "content",       "country",
    "private",      "little",        "visit",         "save",
    "tools",        "low",           "reply",         "customer",
    "december",     "compare",       "movies",        "include",
    "college",      "value",         "article",       "york",
    "man",          "card",          "jobs",          "provide",
    "food",         "source",        "author",        "different",
    "press",        "learn",         "sale",          "around",
    "print",        "course",        "job",           "canada",
    "process",      "teen",          "room",          "stock",
    "training",     "too",           "credit",        "point",
    "join",         "science",       "men",           "categories",
    "advanced",     "west",          "sales",         "look",
    "english",      "left",          "team",          "estate",
    "box",          "conditions",    "select",        "windows",
    "photos",       "gay",           "thread",        "week",
    "category",     "note",          "live",          "large",
    "gallery",      "table",         "register",      "however",
    "june",         "october",       "november",      "market",
    "library",      "really",        "action",        "start",
    "series",       "model",         "features",      "air",
    "industry",     "plan",          "human",         "provided",
    "yes",          "required",      "second",        "hot",
    "accessories",  "cost",          "movie",         "forums",
    "march",        "september",     "better",        "say",
    "questions",    "july",          "going",         "medical",
    "test",         "friend",        "come",          "dec",
    "server",       "study",         "application",   "cart",
    "staff",        "articles",      "san",           "feedback",
    "again",        "play",          "looking",       "issues",
    "april",        "never",         "users",         "complete",
    "street",       "topic",         "comment",       "financial",
    "things",       "working",       "against",       "standard",
    "tax",          "person",        "below",         "mobile",
    "less",         "got",           "blog",          "party",
    "payment",      "equipment",     "login",         "student",
    "let",          "programs",      "offers",        "legal",
    "above",        "recent",        "park",          "stores",
    "side",         "act",           "problem",       "red",
    "give",         "memory",        "performance",   "social",
    "august",       "quote",         "language",      "story",
    "sell",         "options",       "experience",    "rates",
    "create",       "key",           "body",          "young",
    "america",      "important",     "field",         "few",
    "east",         "paper",         "single",        "age",
    "activities",   "club",          "example",       "girls",
    "additional",   "password",      "latest",        "something",
    "road",         "gift",          "question",      "changes",
    "night",        "hard",          "texas",         "oct",
    "pay",          "four",          "poker",         "status",
    "browse",       "issue",         "range",         "building",
    "seller",       "court",         "february",      "always",
    "result",       "audio",         "light",         "write",
    "war",          "nov",           "offer",         "blue",
    "groups",       "easy",          "given",         "files",
    "event",        "release",       "analysis",      "request",
    "fax",          "china",         "making",        "picture",
    "needs",        "possible",      "might",         "professional",
    "yet",          "month",         "major",         "star",
    "areas",        "future",        "space",         "committee",
    "hand",         "sun",           "cards",         "problems",
    "london",       "washington",    "meeting",       "rss",
    "become",       "interest",      "child",         "keep",
    "enter",        "california",    "porn",          "share",
    "similar",      "garden",        "schools",       "million",
    "added",        "reference",     "companies",     "listed",
    "baby",         "learning",      "energy",        "run",
    "delivery",     "net",           "popular",       "term",
    "film",         "stories",       "put",           "computers",
    "journal",      "reports",       "try",           "welcome",
    "central",      "images",        "president",     "notice",
    "god",          "original",      "head",          "radio",
    "until",        "cell",          "color",         "self",
    "council",      "away",          "includes",      "track",
    "australia",    "discussion",    "archive",       "once",
    "others",       "entertainment", "agreement",     "format",
    "least",        "society",       "months",        "log",
    "safety",       "friends",       "sure",          "faq",
    "trade",        "edition",       "cars",          "messages",
    "marketing",    "tell",          "further",       "updated",
    "association",  "able",          "having",        "provides",
    "david",        "fun",           "already",       "green",
    "studies",      "close",         "common",        "drive",
    "specific",     "several",       "gold",          "feb",
    "living",       "sep",           "collection",    "called",
    "short",        "arts",          "lot",           "ask",
    "display",      "limited",       "powered",       "solutions",
    "means",        "director",      "daily",         "beach",
    "past",         "natural",       "whether",       "due",
    "electronics",  "five",          "upon",          "period",
    "planning",     "database",      "says",          "official",
    "weather",      "mar",           "land",          "average",
    "done",         "technical",     "window",        "france",
    "pro",          "region",        "island",        "record",
    "direct",       "conference",    "environment",   "records",
    "district",     "calendar",      "costs",         "style",
    "url",          "front",         "statement",     "update",
    "parts",        "aug",           "ever",          "downloads",
    "early",        "miles",         "sound",         "resource",
    "present",      "applications",  "either",        "ago",
    "document",     "word",          "works",         "material",
    "bill",         "apr",           "written",       "talk",
    "federal",      "hosting",       "rules",         "final",
    "adult",        "tickets",       "thing",         "centre",
    "requirements", "via",           "cheap",         "nude",
    "kids",         "finance",       "true",          "minutes",
    "else",         "mark",          "third",         "rock",
    "gifts",        "europe",        "reading",       "topics",
    "bad",          "individual",    "tips",          "plus",
    "auto",         "cover",         "usually",       "edit",
    "together",     "videos",        "percent",       "fast",
    "function",     "fact",          "unit",          "getting",
    "global",       "tech",          "meet",          "far",
    "economic",     "player",        "projects",      "lyrics",
    "often",        "subscribe",     "submit",        "germany",
    "amount",       "watch",         "included",      "feel",
    "though",       "bank",          "risk",          "thanks",
    "everything",   "deals",         "various",       "words",
    "linux",        "jul",           "production",    "commercial",
    "james",        "weight",        "town",          "heart",
    "advertising",  "received",      "choose",        "treatment",
    "newsletter",   "archives",      "points",        "knowledge",
    "magazine",     "error",         "camera",        "jun",
    "girl",         "currently",     "construction",  "toys",
    "registered",   "clear",         "golf",          "receive",
    "domain",       "methods",       "chapter",       "makes",
    "protection",   "policies",      "loan",          "wide",
    "beauty",       "manager",       "india",         "position",
    "taken",        "sort",          "listings",      "models",
    "michael",      "known",         "half",          "cases",
    "step",         "engineering",   "florida",       "simple",
    "quick",        "none",          "wireless",      "license",
    "paul",         "friday",        "lake",          "whole",
    "annual",       "published",     "later",         "basic",
    "shows",        "corporate",     "church",        "method",
    "purchase",     "customers",     "active",        "response",
    "practice",     "hardware",      "figure",        "materials",
    "fire",         "holiday",       "chat",          "enough",
    "designed",     "along",         "among",         "death",
    "writing",      "speed",         "html",          "countries",
    "loss",         "face",          "brand",         "discount",
    "higher",       "effects",       "created",       "remember",
    "standards",    "oil",           "bit",           "yellow",
    "political",    "increase",      "advertise",     "kingdom",
    "base",         "near",          "environmental", "thought",
    "stuff",        "french",        "storage",       "japan",
    "doing",        "loans",         "shoes",         "entry",
    "stay",         "nature",        "orders",        "availability",
    "africa",       "summary",       "turn",          "mean",
    "growth",       "notes",         "agency",        "king",
    "monday",       "european",      "activity",      "copy",
    "although",     "drug",          "pics",          "western",
    "income",       "force",         "cash",          "employment",
    "overall",      "bay",           "river",         "commission",
    "package",      "contents",      "seen",          "players",
    "engine",       "port",          "album",         "regional",
    "stop",         "supplies",      "started",       "administration",
    "bar",          "institute",     "views",         "plans",
    "double",       "dog",           "build",         "screen",
    "exchange",     "types",         "soon",          "sponsored",
    "lines",        "electronic",    "continue",      "across",
    "benefits",     "needed",        "season",        "apply",
    "someone",      "held",          "anything",      "printer",
    "condition",    "effective",     "believe",       "organization",
    "effect",       "asked",         "eur",           "mind"};

}  // namespace lib
}  // namespace icing

#endif  // ICING_MONKEY_TEST_MONKEY_TEST_COMMON_WORDS_H_