From af1a266670d040d2f4083ff309d732d648afba2a Mon Sep 17 00:00:00 2001 From: Angelos Mouzakitis Date: Tue, 10 Oct 2023 14:33:42 +0000 Subject: Add submodule dependency files Change-Id: Iaf8d18082d3991dec7c0ebbea540f092188eb4ec --- .../Source/Python/UPT/Library/UniClassObject.py | 1074 ++++++++++++++++++++ 1 file changed, 1074 insertions(+) create mode 100644 roms/edk2/BaseTools/Source/Python/UPT/Library/UniClassObject.py (limited to 'roms/edk2/BaseTools/Source/Python/UPT/Library/UniClassObject.py') diff --git a/roms/edk2/BaseTools/Source/Python/UPT/Library/UniClassObject.py b/roms/edk2/BaseTools/Source/Python/UPT/Library/UniClassObject.py new file mode 100644 index 000000000..d25f30014 --- /dev/null +++ b/roms/edk2/BaseTools/Source/Python/UPT/Library/UniClassObject.py @@ -0,0 +1,1074 @@ +## @file +# Collect all defined strings in multiple uni files. +# +# Copyright (c) 2014 - 2019, Intel Corporation. All rights reserved.
+# +# SPDX-License-Identifier: BSD-2-Clause-Patent +# +""" +Collect all defined strings in multiple uni files +""" +from __future__ import print_function + +## +# Import Modules +# +import os, codecs, re +import distutils.util +from Logger import ToolError +from Logger import Log as EdkLogger +from Logger import StringTable as ST +from Library.StringUtils import GetLineNo +from Library.Misc import PathClass +from Library.Misc import GetCharIndexOutStr +from Library import DataType as DT +from Library.ParserValidate import CheckUTF16FileHeader + +## +# Static definitions +# +UNICODE_WIDE_CHAR = u'\\wide' +UNICODE_NARROW_CHAR = u'\\narrow' +UNICODE_NON_BREAKING_CHAR = u'\\nbr' +UNICODE_UNICODE_CR = '\r' +UNICODE_UNICODE_LF = '\n' + +NARROW_CHAR = u'\uFFF0' +WIDE_CHAR = u'\uFFF1' +NON_BREAKING_CHAR = u'\uFFF2' +CR = u'\u000D' +LF = u'\u000A' +NULL = u'\u0000' +TAB = u'\t' +BACK_SPLASH = u'\\' + +gLANG_CONV_TABLE = {'eng':'en', 'fra':'fr', \ + 'aar':'aa', 'abk':'ab', 'ave':'ae', 'afr':'af', 'aka':'ak', 'amh':'am', \ + 'arg':'an', 'ara':'ar', 'asm':'as', 'ava':'av', 'aym':'ay', 'aze':'az', \ + 'bak':'ba', 'bel':'be', 'bul':'bg', 'bih':'bh', 'bis':'bi', 'bam':'bm', \ + 'ben':'bn', 'bod':'bo', 'bre':'br', 'bos':'bs', 'cat':'ca', 'che':'ce', \ + 'cha':'ch', 'cos':'co', 'cre':'cr', 'ces':'cs', 'chu':'cu', 'chv':'cv', \ + 'cym':'cy', 'dan':'da', 'deu':'de', 'div':'dv', 'dzo':'dz', 'ewe':'ee', \ + 'ell':'el', 'epo':'eo', 'spa':'es', 'est':'et', 'eus':'eu', 'fas':'fa', \ + 'ful':'ff', 'fin':'fi', 'fij':'fj', 'fao':'fo', 'fry':'fy', 'gle':'ga', \ + 'gla':'gd', 'glg':'gl', 'grn':'gn', 'guj':'gu', 'glv':'gv', 'hau':'ha', \ + 'heb':'he', 'hin':'hi', 'hmo':'ho', 'hrv':'hr', 'hat':'ht', 'hun':'hu', \ + 'hye':'hy', 'her':'hz', 'ina':'ia', 'ind':'id', 'ile':'ie', 'ibo':'ig', \ + 'iii':'ii', 'ipk':'ik', 'ido':'io', 'isl':'is', 'ita':'it', 'iku':'iu', \ + 'jpn':'ja', 'jav':'jv', 'kat':'ka', 'kon':'kg', 'kik':'ki', 'kua':'kj', \ + 'kaz':'kk', 'kal':'kl', 'khm':'km', 'kan':'kn', 'kor':'ko', 'kau':'kr', \ + 'kas':'ks', 'kur':'ku', 'kom':'kv', 'cor':'kw', 'kir':'ky', 'lat':'la', \ + 'ltz':'lb', 'lug':'lg', 'lim':'li', 'lin':'ln', 'lao':'lo', 'lit':'lt', \ + 'lub':'lu', 'lav':'lv', 'mlg':'mg', 'mah':'mh', 'mri':'mi', 'mkd':'mk', \ + 'mal':'ml', 'mon':'mn', 'mar':'mr', 'msa':'ms', 'mlt':'mt', 'mya':'my', \ + 'nau':'na', 'nob':'nb', 'nde':'nd', 'nep':'ne', 'ndo':'ng', 'nld':'nl', \ + 'nno':'nn', 'nor':'no', 'nbl':'nr', 'nav':'nv', 'nya':'ny', 'oci':'oc', \ + 'oji':'oj', 'orm':'om', 'ori':'or', 'oss':'os', 'pan':'pa', 'pli':'pi', \ + 'pol':'pl', 'pus':'ps', 'por':'pt', 'que':'qu', 'roh':'rm', 'run':'rn', \ + 'ron':'ro', 'rus':'ru', 'kin':'rw', 'san':'sa', 'srd':'sc', 'snd':'sd', \ + 'sme':'se', 'sag':'sg', 'sin':'si', 'slk':'sk', 'slv':'sl', 'smo':'sm', \ + 'sna':'sn', 'som':'so', 'sqi':'sq', 'srp':'sr', 'ssw':'ss', 'sot':'st', \ + 'sun':'su', 'swe':'sv', 'swa':'sw', 'tam':'ta', 'tel':'te', 'tgk':'tg', \ + 'tha':'th', 'tir':'ti', 'tuk':'tk', 'tgl':'tl', 'tsn':'tn', 'ton':'to', \ + 'tur':'tr', 'tso':'ts', 'tat':'tt', 'twi':'tw', 'tah':'ty', 'uig':'ug', \ + 'ukr':'uk', 'urd':'ur', 'uzb':'uz', 'ven':'ve', 'vie':'vi', 'vol':'vo', \ + 'wln':'wa', 'wol':'wo', 'xho':'xh', 'yid':'yi', 'yor':'yo', 'zha':'za', \ + 'zho':'zh', 'zul':'zu'} + +## Convert a python unicode string to a normal string +# +# Convert a python unicode string to a normal string +# UniToStr(u'I am a string') is 'I am a string' +# +# @param Uni: The python unicode string +# +# @retval: The formatted normal string +# +def UniToStr(Uni): + return repr(Uni)[2:-1] + +## Convert a unicode string to a Hex list +# +# Convert a unicode string to a Hex list +# UniToHexList('ABC') is ['0x41', '0x00', '0x42', '0x00', '0x43', '0x00'] +# +# @param Uni: The python unicode string +# +# @retval List: The formatted hex list +# +def UniToHexList(Uni): + List = [] + for Item in Uni: + Temp = '%04X' % ord(Item) + List.append('0x' + Temp[2:4]) + List.append('0x' + Temp[0:2]) + return List + +## Convert special unicode characters +# +# Convert special characters to (c), (r) and (tm). +# +# @param Uni: The python unicode string +# +# @retval NewUni: The converted unicode string +# +def ConvertSpecialUnicodes(Uni): + OldUni = NewUni = Uni + NewUni = NewUni.replace(u'\u00A9', '(c)') + NewUni = NewUni.replace(u'\u00AE', '(r)') + NewUni = NewUni.replace(u'\u2122', '(tm)') + if OldUni == NewUni: + NewUni = OldUni + return NewUni + +## GetLanguageCode1766 +# +# Check the language code read from .UNI file and convert RFC 4646 codes to RFC 1766 codes +# RFC 1766 language codes supported in compatibility mode +# RFC 4646 language codes supported in native mode +# +# @param LangName: Language codes read from .UNI file +# +# @retval LangName: Valid language code in RFC 1766 format or None +# +def GetLanguageCode1766(LangName, File=None): + return LangName + + length = len(LangName) + if length == 2: + if LangName.isalpha(): + for Key in gLANG_CONV_TABLE.keys(): + if gLANG_CONV_TABLE.get(Key) == LangName.lower(): + return Key + elif length == 3: + if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()): + return LangName + else: + EdkLogger.Error("Unicode File Parser", + ToolError.FORMAT_INVALID, + "Invalid RFC 1766 language code : %s" % LangName, + File) + elif length == 5: + if LangName[0:2].isalpha() and LangName[2] == '-': + for Key in gLANG_CONV_TABLE.keys(): + if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower(): + return Key + elif length >= 6: + if LangName[0:2].isalpha() and LangName[2] == '-': + for Key in gLANG_CONV_TABLE.keys(): + if gLANG_CONV_TABLE.get(Key) == LangName[0:2].lower(): + return Key + if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None and LangName[3] == '-': + for Key in gLANG_CONV_TABLE.keys(): + if Key == LangName[0:3].lower(): + return Key + + EdkLogger.Error("Unicode File Parser", + ToolError.FORMAT_INVALID, + "Invalid RFC 4646 language code : %s" % LangName, + File) + +## GetLanguageCode +# +# Check the language code read from .UNI file and convert RFC 1766 codes to RFC 4646 codes if appropriate +# RFC 1766 language codes supported in compatibility mode +# RFC 4646 language codes supported in native mode +# +# @param LangName: Language codes read from .UNI file +# +# @retval LangName: Valid lanugage code in RFC 4646 format or None +# +def GetLanguageCode(LangName, IsCompatibleMode, File): + length = len(LangName) + if IsCompatibleMode: + if length == 3 and LangName.isalpha(): + TempLangName = gLANG_CONV_TABLE.get(LangName.lower()) + if TempLangName is not None: + return TempLangName + return LangName + else: + EdkLogger.Error("Unicode File Parser", + ToolError.FORMAT_INVALID, + "Invalid RFC 1766 language code : %s" % LangName, + File) + if (LangName[0] == 'X' or LangName[0] == 'x') and LangName[1] == '-': + return LangName + if length == 2: + if LangName.isalpha(): + return LangName + elif length == 3: + if LangName.isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None: + return LangName + elif length == 5: + if LangName[0:2].isalpha() and LangName[2] == '-': + return LangName + elif length >= 6: + if LangName[0:2].isalpha() and LangName[2] == '-': + return LangName + if LangName[0:3].isalpha() and gLANG_CONV_TABLE.get(LangName.lower()) is None and LangName[3] == '-': + return LangName + + EdkLogger.Error("Unicode File Parser", + ToolError.FORMAT_INVALID, + "Invalid RFC 4646 language code : %s" % LangName, + File) + +## FormatUniEntry +# +# Formatted the entry in Uni file. +# +# @param StrTokenName StrTokenName. +# @param TokenValueList A list need to be processed. +# @param ContainerFile ContainerFile. +# +# @return formatted entry +def FormatUniEntry(StrTokenName, TokenValueList, ContainerFile): + SubContent = '' + PreFormatLength = 40 + if len(StrTokenName) > PreFormatLength: + PreFormatLength = len(StrTokenName) + 1 + for (Lang, Value) in TokenValueList: + if not Value or Lang == DT.TAB_LANGUAGE_EN_X: + continue + if Lang == '': + Lang = DT.TAB_LANGUAGE_EN_US + if Lang == 'eng': + Lang = DT.TAB_LANGUAGE_EN_US + elif len(Lang.split('-')[0]) == 3: + Lang = GetLanguageCode(Lang.split('-')[0], True, ContainerFile) + else: + Lang = GetLanguageCode(Lang, False, ContainerFile) + ValueList = Value.split('\n') + SubValueContent = '' + for SubValue in ValueList: + if SubValue.strip(): + SubValueContent += \ + ' ' * (PreFormatLength + len('#language en-US ')) + '\"%s\\n\"' % SubValue.strip() + '\r\n' + SubValueContent = SubValueContent[(PreFormatLength + len('#language en-US ')):SubValueContent.rfind('\\n')] \ + + '\"' + '\r\n' + SubContent += ' '*PreFormatLength + '#language %-5s ' % Lang + SubValueContent + if SubContent: + SubContent = StrTokenName + ' '*(PreFormatLength - len(StrTokenName)) + SubContent[PreFormatLength:] + return SubContent + + +## StringDefClassObject +# +# A structure for language definition +# +class StringDefClassObject(object): + def __init__(self, Name = None, Value = None, Referenced = False, Token = None, UseOtherLangDef = ''): + self.StringName = '' + self.StringNameByteList = [] + self.StringValue = '' + self.StringValueByteList = '' + self.Token = 0 + self.Referenced = Referenced + self.UseOtherLangDef = UseOtherLangDef + self.Length = 0 + + if Name is not None: + self.StringName = Name + self.StringNameByteList = UniToHexList(Name) + if Value is not None: + self.StringValue = Value + self.StringValueByteList = UniToHexList(self.StringValue) + self.Length = len(self.StringValueByteList) + if Token is not None: + self.Token = Token + + def __str__(self): + return repr(self.StringName) + ' ' + \ + repr(self.Token) + ' ' + \ + repr(self.Referenced) + ' ' + \ + repr(self.StringValue) + ' ' + \ + repr(self.UseOtherLangDef) + + def UpdateValue(self, Value = None): + if Value is not None: + if self.StringValue: + self.StringValue = self.StringValue + '\r\n' + Value + else: + self.StringValue = Value + self.StringValueByteList = UniToHexList(self.StringValue) + self.Length = len(self.StringValueByteList) + +## UniFileClassObject +# +# A structure for .uni file definition +# +class UniFileClassObject(object): + def __init__(self, FileList = None, IsCompatibleMode = False, IncludePathList = None): + self.FileList = FileList + self.File = None + self.IncFileList = FileList + self.UniFileHeader = '' + self.Token = 2 + self.LanguageDef = [] #[ [u'LanguageIdentifier', u'PrintableName'], ... ] + self.OrderedStringList = {} #{ u'LanguageIdentifier' : [StringDefClassObject] } + self.OrderedStringDict = {} #{ u'LanguageIdentifier' : {StringName:(IndexInList)} } + self.OrderedStringListByToken = {} #{ u'LanguageIdentifier' : {Token: StringDefClassObject} } + self.IsCompatibleMode = IsCompatibleMode + if not IncludePathList: + self.IncludePathList = [] + else: + self.IncludePathList = IncludePathList + if len(self.FileList) > 0: + self.LoadUniFiles(FileList) + + # + # Get Language definition + # + def GetLangDef(self, File, Line): + Lang = distutils.util.split_quoted((Line.split(u"//")[0])) + if len(Lang) != 3: + try: + FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines() + except UnicodeError as Xstr: + FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines() + except UnicodeError as Xstr: + FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines() + except: + EdkLogger.Error("Unicode File Parser", + ToolError.FILE_OPEN_FAILURE, + "File read failure: %s" % str(Xstr), + ExtraData=File) + LineNo = GetLineNo(FileIn, Line, False) + EdkLogger.Error("Unicode File Parser", + ToolError.PARSER_ERROR, + "Wrong language definition", + ExtraData="""%s\n\t*Correct format is like '#langdef en-US "English"'""" % Line, + File = File, Line = LineNo) + else: + LangName = GetLanguageCode(Lang[1], self.IsCompatibleMode, self.File) + LangPrintName = Lang[2] + + IsLangInDef = False + for Item in self.LanguageDef: + if Item[0] == LangName: + IsLangInDef = True + break + + if not IsLangInDef: + self.LanguageDef.append([LangName, LangPrintName]) + + # + # Add language string + # + self.AddStringToList(u'$LANGUAGE_NAME', LangName, LangName, 0, True, Index=0) + self.AddStringToList(u'$PRINTABLE_LANGUAGE_NAME', LangName, LangPrintName, 1, True, Index=1) + + if not IsLangInDef: + # + # The found STRING tokens will be added into new language string list + # so that the unique STRING identifier is reserved for all languages in the package list. + # + FirstLangName = self.LanguageDef[0][0] + if LangName != FirstLangName: + for Index in range (2, len (self.OrderedStringList[FirstLangName])): + Item = self.OrderedStringList[FirstLangName][Index] + if Item.UseOtherLangDef != '': + OtherLang = Item.UseOtherLangDef + else: + OtherLang = FirstLangName + self.OrderedStringList[LangName].append (StringDefClassObject(Item.StringName, + '', + Item.Referenced, + Item.Token, + OtherLang)) + self.OrderedStringDict[LangName][Item.StringName] = len(self.OrderedStringList[LangName]) - 1 + return True + + # + # Get String name and value + # + def GetStringObject(self, Item): + Language = '' + Value = '' + + Name = Item.split()[1] + # Check the string name is the upper character + if Name != '': + MatchString = re.match('[A-Z0-9_]+', Name, re.UNICODE) + if MatchString is None or MatchString.end(0) != len(Name): + EdkLogger.Error("Unicode File Parser", + ToolError.FORMAT_INVALID, + 'The string token name %s in UNI file %s must be upper case character.' %(Name, self.File)) + LanguageList = Item.split(u'#language ') + for IndexI in range(len(LanguageList)): + if IndexI == 0: + continue + else: + Language = LanguageList[IndexI].split()[0] + #.replace(u'\r\n', u'') + Value = \ + LanguageList[IndexI][LanguageList[IndexI].find(u'\"') + len(u'\"') : LanguageList[IndexI].rfind(u'\"')] + Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File) + self.AddStringToList(Name, Language, Value) + + # + # Get include file list and load them + # + def GetIncludeFile(self, Item, Dir = None): + if Dir: + pass + FileName = Item[Item.find(u'!include ') + len(u'!include ') :Item.find(u' ', len(u'!include '))][1:-1] + self.LoadUniFile(FileName) + + # + # Pre-process before parse .uni file + # + def PreProcess(self, File, IsIncludeFile=False): + if not os.path.exists(File.Path) or not os.path.isfile(File.Path): + EdkLogger.Error("Unicode File Parser", + ToolError.FILE_NOT_FOUND, + ExtraData=File.Path) + + # + # Check file header of the Uni file + # +# if not CheckUTF16FileHeader(File.Path): +# EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, +# ExtraData='The file %s is either invalid UTF-16LE or it is missing the BOM.' % File.Path) + + try: + FileIn = codecs.open(File.Path, mode='rb', encoding='utf_8').readlines() + except UnicodeError as Xstr: + FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16').readlines() + except UnicodeError: + FileIn = codecs.open(File.Path, mode='rb', encoding='utf_16_le').readlines() + except: + EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=File.Path) + + + # + # get the file header + # + Lines = [] + HeaderStart = False + HeaderEnd = False + if not self.UniFileHeader: + FirstGenHeader = True + else: + FirstGenHeader = False + for Line in FileIn: + Line = Line.strip() + if Line == u'': + continue + if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and (Line.find(DT.TAB_HEADER_COMMENT) > -1) \ + and not HeaderEnd and not HeaderStart: + HeaderStart = True + if not Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd: + HeaderEnd = True + if Line.startswith(DT.TAB_COMMENT_EDK1_SPLIT) and HeaderStart and not HeaderEnd and FirstGenHeader: + self.UniFileHeader += Line + '\r\n' + continue + + # + # Use unique identifier + # + FindFlag = -1 + LineCount = 0 + MultiLineFeedExits = False + # + # 0: initial value + # 1: single String entry exist + # 2: line feed exist under the some single String entry + # + StringEntryExistsFlag = 0 + for Line in FileIn: + Line = FileIn[LineCount] + LineCount += 1 + Line = Line.strip() + # + # Ignore comment line and empty line + # + if Line == u'' or Line.startswith(u'//'): + # + # Change the single line String entry flag status + # + if StringEntryExistsFlag == 1: + StringEntryExistsFlag = 2 + # + # If the '#string' line and the '#language' line are not in the same line, + # there should be only one line feed character between them + # + if MultiLineFeedExits: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + continue + + MultiLineFeedExits = False + # + # Process comment embedded in string define lines + # + FindFlag = Line.find(u'//') + if FindFlag != -1 and Line.find(u'//') < Line.find(u'"'): + Line = Line.replace(Line[FindFlag:], u' ') + if FileIn[LineCount].strip().startswith('#language'): + Line = Line + FileIn[LineCount] + FileIn[LineCount-1] = Line + FileIn[LineCount] = '\r\n' + LineCount -= 1 + for Index in range (LineCount + 1, len (FileIn) - 1): + if (Index == len(FileIn) -1): + FileIn[Index] = '\r\n' + else: + FileIn[Index] = FileIn[Index + 1] + continue + CommIndex = GetCharIndexOutStr(u'/', Line) + if CommIndex > -1: + if (len(Line) - 1) > CommIndex: + if Line[CommIndex+1] == u'/': + Line = Line[:CommIndex].strip() + else: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + else: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + + Line = Line.replace(UNICODE_WIDE_CHAR, WIDE_CHAR) + Line = Line.replace(UNICODE_NARROW_CHAR, NARROW_CHAR) + Line = Line.replace(UNICODE_NON_BREAKING_CHAR, NON_BREAKING_CHAR) + + Line = Line.replace(u'\\\\', u'\u0006') + Line = Line.replace(u'\\r\\n', CR + LF) + Line = Line.replace(u'\\n', CR + LF) + Line = Line.replace(u'\\r', CR) + Line = Line.replace(u'\\t', u'\t') + Line = Line.replace(u'''\"''', u'''"''') + Line = Line.replace(u'\t', u' ') + Line = Line.replace(u'\u0006', u'\\') + + # + # Check if single line has correct '"' + # + if Line.startswith(u'#string') and Line.find(u'#language') > -1 and Line.find('"') > Line.find(u'#language'): + if not Line.endswith('"'): + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + ExtraData='''The line %s misses '"' at the end of it in file %s''' + % (LineCount, File.Path)) + + # + # Between Name entry and Language entry can not contain line feed + # + if Line.startswith(u'#string') and Line.find(u'#language') == -1: + MultiLineFeedExits = True + + if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.find(u'"') < 0: + MultiLineFeedExits = True + + # + # Between Language entry and String entry can not contain line feed + # + if Line.startswith(u'#language') and len(Line.split()) == 2: + MultiLineFeedExits = True + + # + # Check the situation that there only has one '"' for the language entry + # + if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.count(u'"') == 1: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + ExtraData='''The line %s misses '"' at the end of it in file %s''' + % (LineCount, File.Path)) + + # + # Check the situation that there has more than 2 '"' for the language entry + # + if Line.startswith(u'#string') and Line.find(u'#language') > 0 and Line.replace(u'\\"', '').count(u'"') > 2: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + ExtraData='''The line %s has more than 2 '"' for language entry in file %s''' + % (LineCount, File.Path)) + + # + # Between two String entry, can not contain line feed + # + if Line.startswith(u'"'): + if StringEntryExistsFlag == 2: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + Message=ST.ERR_UNIPARSE_LINEFEED_UP_EXIST % Line, ExtraData=File.Path) + + StringEntryExistsFlag = 1 + if not Line.endswith('"'): + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + ExtraData='''The line %s misses '"' at the end of it in file %s''' + % (LineCount, File.Path)) + + # + # Check the situation that there has more than 2 '"' for the language entry + # + if Line.strip() and Line.replace(u'\\"', '').count(u'"') > 2: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + ExtraData='''The line %s has more than 2 '"' for language entry in file %s''' + % (LineCount, File.Path)) + + elif Line.startswith(u'#language'): + if StringEntryExistsFlag == 2: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + Message=ST.ERR_UNI_MISS_STRING_ENTRY % Line, ExtraData=File.Path) + StringEntryExistsFlag = 0 + else: + StringEntryExistsFlag = 0 + + Lines.append(Line) + + # + # Convert string def format as below + # + # #string MY_STRING_1 + # #language eng + # "My first English string line 1" + # "My first English string line 2" + # #string MY_STRING_1 + # #language spa + # "Mi segunda secuencia 1" + # "Mi segunda secuencia 2" + # + + if not IsIncludeFile and not Lines: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_NO_SECTION_EXIST, \ + ExtraData=File.Path) + + NewLines = [] + StrName = u'' + ExistStrNameList = [] + for Line in Lines: + if StrName and not StrName.split()[1].startswith(DT.TAB_STR_TOKENCNAME + DT.TAB_UNDERLINE_SPLIT): + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \ + ExtraData=File.Path) + + if StrName and len(StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT)) == 4: + StringTokenList = StrName.split()[1].split(DT.TAB_UNDERLINE_SPLIT) + if (StringTokenList[3].upper() in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP] and \ + StringTokenList[3] not in [DT.TAB_STR_TOKENPROMPT, DT.TAB_STR_TOKENHELP]) or \ + (StringTokenList[2].upper() == DT.TAB_STR_TOKENERR and StringTokenList[2] != DT.TAB_STR_TOKENERR): + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_STRTOKEN_FORMAT_ERROR % StrName.split()[1], \ + ExtraData=File.Path) + + if Line.count(u'#language') > 1: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_SEP_LANGENTRY_LINE % Line, \ + ExtraData=File.Path) + + if Line.startswith(u'//'): + continue + elif Line.startswith(u'#langdef'): + if len(Line.split()) == 2: + NewLines.append(Line) + continue + elif len(Line.split()) > 2 and Line.find(u'"') > 0: + NewLines.append(Line[:Line.find(u'"')].strip()) + NewLines.append(Line[Line.find(u'"'):]) + else: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + elif Line.startswith(u'#string'): + if len(Line.split()) == 2: + StrName = Line + if StrName: + if StrName.split()[1] not in ExistStrNameList: + ExistStrNameList.append(StrName.split()[1].strip()) + elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \ + DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \ + DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \ + DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \ + ExtraData=File.Path) + continue + elif len(Line.split()) == 4 and Line.find(u'#language') > 0: + if Line[Line.find(u'#language')-1] != ' ' or \ + Line[Line.find(u'#language')+len(u'#language')] != u' ': + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + + if Line.find(u'"') > 0: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + + StrName = Line.split()[0] + u' ' + Line.split()[1] + if StrName: + if StrName.split()[1] not in ExistStrNameList: + ExistStrNameList.append(StrName.split()[1].strip()) + elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \ + DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \ + DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \ + DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \ + ExtraData=File.Path) + if IsIncludeFile: + if StrName not in NewLines: + NewLines.append((Line[:Line.find(u'#language')]).strip()) + else: + NewLines.append((Line[:Line.find(u'#language')]).strip()) + NewLines.append((Line[Line.find(u'#language'):]).strip()) + elif len(Line.split()) > 4 and Line.find(u'#language') > 0 and Line.find(u'"') > 0: + if Line[Line.find(u'#language')-1] != u' ' or \ + Line[Line.find(u'#language')+len(u'#language')] != u' ': + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + + if Line[Line.find(u'"')-1] != u' ': + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + + StrName = Line.split()[0] + u' ' + Line.split()[1] + if StrName: + if StrName.split()[1] not in ExistStrNameList: + ExistStrNameList.append(StrName.split()[1].strip()) + elif StrName.split()[1] in [DT.TAB_INF_ABSTRACT, DT.TAB_INF_DESCRIPTION, \ + DT.TAB_INF_BINARY_ABSTRACT, DT.TAB_INF_BINARY_DESCRIPTION, \ + DT.TAB_DEC_PACKAGE_ABSTRACT, DT.TAB_DEC_PACKAGE_DESCRIPTION, \ + DT.TAB_DEC_BINARY_ABSTRACT, DT.TAB_DEC_BINARY_DESCRIPTION]: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_MULTI_ENTRY_EXIST % StrName.split()[1], \ + ExtraData=File.Path) + if IsIncludeFile: + if StrName not in NewLines: + NewLines.append((Line[:Line.find(u'#language')]).strip()) + else: + NewLines.append((Line[:Line.find(u'#language')]).strip()) + NewLines.append((Line[Line.find(u'#language'):Line.find(u'"')]).strip()) + NewLines.append((Line[Line.find(u'"'):]).strip()) + else: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + elif Line.startswith(u'#language'): + if len(Line.split()) == 2: + if IsIncludeFile: + if StrName not in NewLines: + NewLines.append(StrName) + else: + NewLines.append(StrName) + NewLines.append(Line) + elif len(Line.split()) > 2 and Line.find(u'"') > 0: + if IsIncludeFile: + if StrName not in NewLines: + NewLines.append(StrName) + else: + NewLines.append(StrName) + NewLines.append((Line[:Line.find(u'"')]).strip()) + NewLines.append((Line[Line.find(u'"'):]).strip()) + else: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + elif Line.startswith(u'"'): + if u'#string' in Line or u'#language' in Line: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + NewLines.append(Line) + else: + print(Line) + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, ExtraData=File.Path) + + if StrName and not StrName.split()[1].startswith(u'STR_'): + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_STRNAME_FORMAT_ERROR % StrName.split()[1], \ + ExtraData=File.Path) + + if StrName and not NewLines: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNI_MISS_LANGENTRY % StrName, \ + ExtraData=File.Path) + + # + # Check Abstract, Description, BinaryAbstract and BinaryDescription order, + # should be Abstract, Description, BinaryAbstract, BinaryDescription + AbstractPosition = -1 + DescriptionPosition = -1 + BinaryAbstractPosition = -1 + BinaryDescriptionPosition = -1 + for StrName in ExistStrNameList: + if DT.TAB_HEADER_ABSTRACT.upper() in StrName: + if 'BINARY' in StrName: + BinaryAbstractPosition = ExistStrNameList.index(StrName) + else: + AbstractPosition = ExistStrNameList.index(StrName) + if DT.TAB_HEADER_DESCRIPTION.upper() in StrName: + if 'BINARY' in StrName: + BinaryDescriptionPosition = ExistStrNameList.index(StrName) + else: + DescriptionPosition = ExistStrNameList.index(StrName) + + OrderList = sorted([AbstractPosition, DescriptionPosition]) + BinaryOrderList = sorted([BinaryAbstractPosition, BinaryDescriptionPosition]) + Min = OrderList[0] + Max = OrderList[1] + BinaryMin = BinaryOrderList[0] + BinaryMax = BinaryOrderList[1] + if BinaryDescriptionPosition > -1: + if not(BinaryDescriptionPosition == BinaryMax and BinaryAbstractPosition == BinaryMin and \ + BinaryMax > Max): + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \ + ExtraData=File.Path) + elif BinaryAbstractPosition > -1: + if not(BinaryAbstractPosition > Max): + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \ + ExtraData=File.Path) + + if DescriptionPosition > -1: + if not(DescriptionPosition == Max and AbstractPosition == Min and \ + DescriptionPosition > AbstractPosition): + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, \ + Message=ST.ERR_UNIPARSE_ENTRY_ORDER_WRONG, \ + ExtraData=File.Path) + + if not self.UniFileHeader: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + Message = ST.ERR_NO_SOURCE_HEADER, + ExtraData=File.Path) + + return NewLines + + # + # Load a .uni file + # + def LoadUniFile(self, File = None): + if File is None: + EdkLogger.Error("Unicode File Parser", + ToolError.PARSER_ERROR, + Message='No unicode file is given', + ExtraData=File.Path) + + self.File = File + + # + # Process special char in file + # + Lines = self.PreProcess(File) + + # + # Get Unicode Information + # + for IndexI in range(len(Lines)): + Line = Lines[IndexI] + if (IndexI + 1) < len(Lines): + SecondLine = Lines[IndexI + 1] + if (IndexI + 2) < len(Lines): + ThirdLine = Lines[IndexI + 2] + + # + # Get Language def information + # + if Line.find(u'#langdef ') >= 0: + self.GetLangDef(File, Line + u' ' + SecondLine) + continue + + Name = '' + Language = '' + Value = '' + CombineToken = False + # + # Get string def information format as below + # + # #string MY_STRING_1 + # #language eng + # "My first English string line 1" + # "My first English string line 2" + # #string MY_STRING_1 + # #language spa + # "Mi segunda secuencia 1" + # "Mi segunda secuencia 2" + # + if Line.find(u'#string ') >= 0 and Line.find(u'#language ') < 0 and \ + SecondLine.find(u'#string ') < 0 and SecondLine.find(u'#language ') >= 0 and \ + ThirdLine.find(u'#string ') < 0 and ThirdLine.find(u'#language ') < 0: + if Line.find('"') > 0 or SecondLine.find('"') > 0: + EdkLogger.Error("Unicode File Parser", ToolError.FORMAT_INVALID, + Message=ST.ERR_UNIPARSE_DBLQUOTE_UNMATCHED, + ExtraData=File.Path) + + Name = Line[Line.find(u'#string ') + len(u'#string ') : ].strip(' ') + Language = SecondLine[SecondLine.find(u'#language ') + len(u'#language ') : ].strip(' ') + for IndexJ in range(IndexI + 2, len(Lines)): + if Lines[IndexJ].find(u'#string ') < 0 and Lines[IndexJ].find(u'#language ') < 0 and \ + Lines[IndexJ].strip().startswith(u'"') and Lines[IndexJ].strip().endswith(u'"'): + if Lines[IndexJ][-2] == ' ': + CombineToken = True + if CombineToken: + if Lines[IndexJ].strip()[1:-1].strip(): + Value = Value + Lines[IndexJ].strip()[1:-1].rstrip() + ' ' + else: + Value = Value + Lines[IndexJ].strip()[1:-1] + CombineToken = False + else: + Value = Value + Lines[IndexJ].strip()[1:-1] + '\r\n' + else: + IndexI = IndexJ + break + if Value.endswith('\r\n'): + Value = Value[: Value.rfind('\r\n')] + Language = GetLanguageCode(Language, self.IsCompatibleMode, self.File) + self.AddStringToList(Name, Language, Value) + continue + + # + # Load multiple .uni files + # + def LoadUniFiles(self, FileList): + if len(FileList) > 0: + for File in FileList: + FilePath = File.Path.strip() + if FilePath.endswith('.uni') or FilePath.endswith('.UNI') or FilePath.endswith('.Uni'): + self.LoadUniFile(File) + + # + # Add a string to list + # + def AddStringToList(self, Name, Language, Value, Token = 0, Referenced = False, UseOtherLangDef = '', Index = -1): + for LangNameItem in self.LanguageDef: + if Language == LangNameItem[0]: + break + + if Language not in self.OrderedStringList: + self.OrderedStringList[Language] = [] + self.OrderedStringDict[Language] = {} + + IsAdded = True + if Name in self.OrderedStringDict[Language]: + IsAdded = False + if Value is not None: + ItemIndexInList = self.OrderedStringDict[Language][Name] + Item = self.OrderedStringList[Language][ItemIndexInList] + Item.UpdateValue(Value) + Item.UseOtherLangDef = '' + + if IsAdded: + Token = len(self.OrderedStringList[Language]) + if Index == -1: + self.OrderedStringList[Language].append(StringDefClassObject(Name, + Value, + Referenced, + Token, + UseOtherLangDef)) + self.OrderedStringDict[Language][Name] = Token + for LangName in self.LanguageDef: + # + # New STRING token will be added into all language string lists. + # so that the unique STRING identifier is reserved for all languages in the package list. + # + if LangName[0] != Language: + if UseOtherLangDef != '': + OtherLangDef = UseOtherLangDef + else: + OtherLangDef = Language + self.OrderedStringList[LangName[0]].append(StringDefClassObject(Name, + '', + Referenced, + Token, + OtherLangDef)) + self.OrderedStringDict[LangName[0]][Name] = len(self.OrderedStringList[LangName[0]]) - 1 + else: + self.OrderedStringList[Language].insert(Index, StringDefClassObject(Name, + Value, + Referenced, + Token, + UseOtherLangDef)) + self.OrderedStringDict[Language][Name] = Index + + # + # Set the string as referenced + # + def SetStringReferenced(self, Name): + # + # String stoken are added in the same order in all language string lists. + # So, only update the status of string stoken in first language string list. + # + Lang = self.LanguageDef[0][0] + if Name in self.OrderedStringDict[Lang]: + ItemIndexInList = self.OrderedStringDict[Lang][Name] + Item = self.OrderedStringList[Lang][ItemIndexInList] + Item.Referenced = True + + # + # Search the string in language definition by Name + # + def FindStringValue(self, Name, Lang): + if Name in self.OrderedStringDict[Lang]: + ItemIndexInList = self.OrderedStringDict[Lang][Name] + return self.OrderedStringList[Lang][ItemIndexInList] + + return None + + # + # Search the string in language definition by Token + # + def FindByToken(self, Token, Lang): + for Item in self.OrderedStringList[Lang]: + if Item.Token == Token: + return Item + + return None + + # + # Re-order strings and re-generate tokens + # + def ReToken(self): + if len(self.LanguageDef) == 0: + return None + # + # Retoken all language strings according to the status of string stoken in the first language string. + # + FirstLangName = self.LanguageDef[0][0] + + # Convert the OrderedStringList to be OrderedStringListByToken in order to faciliate future search by token + for LangNameItem in self.LanguageDef: + self.OrderedStringListByToken[LangNameItem[0]] = {} + + # + # Use small token for all referred string stoken. + # + RefToken = 0 + for Index in range (0, len (self.OrderedStringList[FirstLangName])): + FirstLangItem = self.OrderedStringList[FirstLangName][Index] + if FirstLangItem.Referenced == True: + for LangNameItem in self.LanguageDef: + LangName = LangNameItem[0] + OtherLangItem = self.OrderedStringList[LangName][Index] + OtherLangItem.Referenced = True + OtherLangItem.Token = RefToken + self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem + RefToken = RefToken + 1 + + # + # Use big token for all unreferred string stoken. + # + UnRefToken = 0 + for Index in range (0, len (self.OrderedStringList[FirstLangName])): + FirstLangItem = self.OrderedStringList[FirstLangName][Index] + if FirstLangItem.Referenced == False: + for LangNameItem in self.LanguageDef: + LangName = LangNameItem[0] + OtherLangItem = self.OrderedStringList[LangName][Index] + OtherLangItem.Token = RefToken + UnRefToken + self.OrderedStringListByToken[LangName][OtherLangItem.Token] = OtherLangItem + UnRefToken = UnRefToken + 1 + + # + # Show the instance itself + # + def ShowMe(self): + print(self.LanguageDef) + #print self.OrderedStringList + for Item in self.OrderedStringList: + print(Item) + for Member in self.OrderedStringList[Item]: + print(str(Member)) + + # + # Read content from '!include' UNI file + # + def ReadIncludeUNIfile(self, FilaPath): + if self.File: + pass + + if not os.path.exists(FilaPath) or not os.path.isfile(FilaPath): + EdkLogger.Error("Unicode File Parser", + ToolError.FILE_NOT_FOUND, + ExtraData=FilaPath) + try: + FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_8').readlines() + except UnicodeError as Xstr: + FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16').readlines() + except UnicodeError: + FileIn = codecs.open(FilaPath, mode='rb', encoding='utf_16_le').readlines() + except: + EdkLogger.Error("Unicode File Parser", ToolError.FILE_OPEN_FAILURE, ExtraData=FilaPath) + return FileIn + -- cgit 1.2.3-korg