Source code for dgenerate.translators.argos

# Copyright (c) 2023, Teriks
#
# dgenerate is distributed under the following BSD 3-Clause License
#
# Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in
#    the documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived
#    from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import os.path
import urllib.request

import dgenerate.extras.argostranslate.package as argostranslate_package
import dgenerate.extras.argostranslate.settings as argostranslate_settings

import dgenerate.filelock
import dgenerate.messages as _messages
import dgenerate.translators.exceptions as _exceptions
import dgenerate.translators.util as _util
import dgenerate.spacycache as _spacycache


[docs] class ArgosTranslator: """ Translate languages locally on the CPU using argostranslate models. Supports automatic pivot language selection. """ _offline_mode = False
[docs] def __init__(self, from_lang: str, to_lang: str, local_files_only: bool = False): """ :param from_lang: From language code (IETF), or language name. :param to_lang: To language code (IETF), or language name. :param local_files_only: Only use models that have been previously cached? :raise dgenerate.translators.TranslatorLoadError: If models cannot be loaded / found. """ norm_from_lang = _util.get_language_code(from_lang) if norm_from_lang is None: raise _exceptions.TranslatorLoadError( f'Invalid "from" language / language code: {from_lang}') norm_to_lang = _util.get_language_code(to_lang) if norm_to_lang is None: raise _exceptions.TranslatorLoadError( f'Invalid "to" language / language code: {to_lang}') from_lang = norm_from_lang to_lang = norm_to_lang # search by priority pivot_lang_codes = ['en', 'es', 'fr', 'de', 'it', 'pt'] # argostranslate is not multiprocess safe. # at least make it so between dgenerate processes. argostranslate_settings.cache_dir.mkdir(parents=True, exist_ok=True) with dgenerate.filelock.temp_file_lock( argostranslate_settings.cache_dir / '_dgenerate.lock' ): # download package index if we do not have it, and the user allows it if not os.path.exists(argostranslate_settings.local_package_index): if not (local_files_only or self._offline_mode): _messages.debug_log('argostranslate, updating package index...') self._argos_update_package_index() else: raise _exceptions.TranslatorLoadError( 'argostranslate needs to download a model index ' 'to search for available translation models, but you are in offline mode.') # what is available? (in the index) available_packages = argostranslate_package.get_available_packages() # Try to find this initially package_to_install = None # pivot lang package will go here if required pivot_lang = None try: # search for suitable model (package) package_to_install = next( filter( lambda x: x.from_code == from_lang and x.to_code == to_lang, available_packages ) ) except StopIteration: # did not find anything, lets look for a pivot for code in pivot_lang_codes: try: pivot_lang = next( filter( lambda x: x.from_code == from_lang and x.to_code == code, available_packages ) ) # pivot found break except StopIteration: # keep looking continue if pivot_lang is not None: # we located a suitable pivot model pivot_model_path = self._argos_model_path(pivot_lang) # if it does not exist on disk, and we are not in offline mode, # then download the model if not os.path.exists(pivot_model_path): if local_files_only or self._offline_mode: raise _exceptions.TranslatorLoadError( f'argostranslate needs to download a pivot model ' f'for: "{pivot_lang.from_code}" -> "{pivot_lang.to_code}", ' f'but offline mode is active.') argostranslate_package.install_from_path( self._argos_download_package(pivot_lang) ) # check that we can translate from the pivot language to the desired language. # if we cannot, then this will throw StopIteration try: package_to_install = next( filter( lambda x: x.from_code == pivot_lang.to_code and x.to_code == to_lang, available_packages ) ) except StopIteration: raise _exceptions.TranslatorLoadError( f'argostranslate translation model for "{from_lang}" -> "{to_lang}" is not available.') # This model will be used to translate from the pivot language to the target language model_path = self._argos_model_path(package_to_install) # If that model does not exist, download it if the user allows if not os.path.exists(model_path): if local_files_only or self._offline_mode: raise _exceptions.TranslatorLoadError( f'argostranslate needs to download a model ' f'for: "{package_to_install.from_code}" -> "{package_to_install.to_code}", ' f'but offline mode is active.') argostranslate_package.install_from_path( self._argos_download_package(package_to_install) ) elif package_to_install is not None: # we do not need a pivot model and can translate directly. model_path = self._argos_model_path(package_to_install) # download the model if it does not exist if not os.path.exists(model_path): if local_files_only or self._offline_mode: raise _exceptions.TranslatorLoadError( f'argostranslate needs to download a model ' f'for: "{package_to_install.from_code}" -> "{package_to_install.to_code}", ' f'but offline mode is active.') argostranslate_package.install_from_path( self._argos_download_package(package_to_install) ) else: # could not find anything raise _exceptions.TranslatorLoadError( f'argostranslate translation model for "{from_lang}" -> "{to_lang}" is not available.') self._translation = None self._translation2 = None try: with _spacycache.offline_mode_context(local_files_only): # this trys to download to the spacy cache if the models # do not exist, we want it to throw if we are in offline mode import dgenerate.extras.argostranslate.translate as argostranslate_translate except _spacycache.SpacyModelNotFoundError as e: raise _exceptions.TranslatorLoadError( 'Unable to load argostranslate model due to being ' 'unable to download required SpaCy model with offline mode active.') from e if pivot_lang: _messages.debug_log( f'argostranslate: using pivot {from_lang} -> {pivot_lang.to_code} then {pivot_lang.to_code} -> {to_lang}') # first step, translate to pivot language self._translation = argostranslate_translate.get_translation_from_codes(from_lang, pivot_lang.to_code) # second step, translate from pivot language to target language self._translation2 = argostranslate_translate.get_translation_from_codes(pivot_lang.to_code, to_lang) else: # directly translate self._translation = argostranslate_translate.get_translation_from_codes(from_lang, to_lang)
@staticmethod def _argos_update_package_index(): try: response = urllib.request.urlopen(argostranslate_settings.remote_package_index) except Exception as e: # They eat this exception and then log it without a re-throw, I need to handle it. raise _exceptions.TranslatorLoadError( f'Unable to download argostranslate package index, network error.') from e data = response.read() with open(argostranslate_settings.local_package_index, "wb") as f: f.write(data) @staticmethod def _argos_download_package(package: argostranslate_package.AvailablePackage): try: # this actually just throws "Exception" upon download failure. return package.download() except Exception as e: raise _exceptions.TranslatorLoadError( f'Unable to download argostranslate model: {package.from_code} -> {package.to_code}, network error.') from e @staticmethod def _argos_model_path(package: argostranslate_package.IPackage): # see the source code of package.download() return argostranslate_settings.downloads_dir / ( argostranslate_package.argospm_package_name(package) + '.argosmodel')
[docs] def translate(self, texts: str | list[str]) -> list[str]: """ Translate a list of texts. :param texts: Texts to translate. :return: Translated texts. """ if isinstance(texts, str): texts = [texts] output = [] for text in texts: try: if self._translation2: output.append(self._translation2.translate(self._translation.translate(text))) else: output.append(self._translation.translate(text)) except Exception as e: raise _exceptions.TranslationError(e) from e return output