aboutsummaryrefslogtreecommitdiff
path: root/tensorflow_lite_support/ios/text/tokenizers/Sources/TFLSentencepieceTokenizer.mm
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow_lite_support/ios/text/tokenizers/Sources/TFLSentencepieceTokenizer.mm')
-rw-r--r--tensorflow_lite_support/ios/text/tokenizers/Sources/TFLSentencepieceTokenizer.mm45
1 files changed, 45 insertions, 0 deletions
diff --git a/tensorflow_lite_support/ios/text/tokenizers/Sources/TFLSentencepieceTokenizer.mm b/tensorflow_lite_support/ios/text/tokenizers/Sources/TFLSentencepieceTokenizer.mm
new file mode 100644
index 00000000..1e21cee5
--- /dev/null
+++ b/tensorflow_lite_support/ios/text/tokenizers/Sources/TFLSentencepieceTokenizer.mm
@@ -0,0 +1,45 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#import "third_party/tensorflow_lite_support/ios/text/tokenizers/Sources/TFLSentencepieceTokenizer.h"
+#include "third_party/absl/memory/memory.h"
+#include "third_party/tensorflow_lite_support/cc/text/tokenizers/sentencepiece_tokenizer.h"
+#import "third_party/tensorflow_lite_support/ios/text/tokenizers/Sources/TFLTokenizerUtil.h"
+#import "third_party/tensorflow_lite_support/ios/utils/Sources/TFLStringUtil.h"
+
+NS_ASSUME_NONNULL_BEGIN
+using SentencepieceTokenizerCPP = ::tflite::support::text::tokenizer::SentencePieceTokenizer;
+
+@implementation TFLSentencepieceTokenizer {
+ std::unique_ptr<SentencepieceTokenizerCPP> _spTokenizer;
+}
+
+- (instancetype)initWithModelPath:(NSString *)modelPath {
+ self = [super init];
+ if (self) {
+ _spTokenizer = absl::make_unique<SentencepieceTokenizerCPP>(MakeString(modelPath));
+ }
+ return self;
+}
+
+- (NSArray<NSString *> *)tokensFromInput:(NSString *)input {
+ return Tokenize(_spTokenizer.get(), input);
+}
+
+- (NSArray<NSNumber *> *)idsFromTokens:(NSArray<NSString *> *)tokens {
+ return ConvertTokensToIds(_spTokenizer.get(), tokens);
+}
+
+@end
+NS_ASSUME_NONNULL_END