avwx.parsing.sanitization.taf
TAF sanitization support.
1"""TAF sanitization support.""" 2 3# module 4from avwx.parsing.sanitization.base import sanitize_list_with, sanitize_string_with 5from avwx.parsing.sanitization.cleaners.base import CleanerListType 6from avwx.parsing.sanitization.cleaners.cleaners import OnlySlashes, TrimWxCode 7from avwx.parsing.sanitization.cleaners.joined import ( 8 JoinedCloud, 9 JoinedMinMaxTemperature, 10 JoinedTafNewLine, 11 JoinedTimestamp, 12 JoinedWind, 13) 14from avwx.parsing.sanitization.cleaners.remove import RemoveFromTaf, RemoveTafAmend 15from avwx.parsing.sanitization.cleaners.replace import CURRENT, ReplaceItem 16from avwx.parsing.sanitization.cleaners.separated import ( 17 SeparatedAltimeterLetter, 18 SeparatedCloudAltitude, 19 SeparatedCloudQualifier, 20 SeparatedDistance, 21 SeparatedFirstTemperature, 22 SeparatedMinMaxTemperaturePrefix, 23 SeparatedSecondTemperature, 24 SeparatedTafTimePrefix, 25 SeparatedTemperatureTrailingDigit, 26 SeparatedWindUnit, 27) 28from avwx.parsing.sanitization.cleaners.visibility import VisibilityGreaterThan 29from avwx.parsing.sanitization.cleaners.wind import ( 30 DoubleGust, 31 EmptyWind, 32 MisplaceWindKT, 33 NonGGust, 34 RemoveVrbLeadingDigits, 35 WindLeadingMistype, 36) 37 38TAF_REPL = { 39 **CURRENT, 40 "Z/ ": "Z ", 41 " PROBB": " PROB", 42 " PROBN": " PROB", 43 " PROB3P": "PROB30", 44 " TMM": " TNM", 45 " TMN": " TNM", 46 " TXN": " TXM", 47 " TNTN": " TN", 48 " TXTX": " TX", 49 " TXX": " TX", 50} 51 52 53clean_taf_string = sanitize_string_with(TAF_REPL) 54 55 56CLEANERS: CleanerListType = [ 57 OnlySlashes, 58 EmptyWind, 59 TrimWxCode, 60 SeparatedDistance, 61 SeparatedFirstTemperature, 62 SeparatedCloudAltitude, 63 SeparatedSecondTemperature, 64 SeparatedAltimeterLetter, 65 SeparatedTemperatureTrailingDigit, 66 SeparatedWindUnit, 67 SeparatedCloudQualifier, 68 SeparatedTafTimePrefix, 69 SeparatedMinMaxTemperaturePrefix, 70 RemoveFromTaf, 71 ReplaceItem, 72 RemoveTafAmend, 73 VisibilityGreaterThan, 74 MisplaceWindKT, 75 DoubleGust, 76 WindLeadingMistype, 77 NonGGust, 78 RemoveVrbLeadingDigits, 79 JoinedCloud, 80 JoinedTimestamp, 81 JoinedWind, 82 JoinedTafNewLine, 83 JoinedMinMaxTemperature, 84 ### Other wind fixes 85] 86 87clean_taf_list = sanitize_list_with(CLEANERS)
TAF_REPL =
{'!': '1', '@': '2', '#': '3', '%': '5', '^': '6', '&': '7', '*': '8', '?': ' ', '"': '', "'": '', '`': '', '.': '', '(': ' ', ')': ' ', ';': ' ', 'MISSINGKT': '', ' 0I0': ' 090', 'NOSIGKT ': 'KT NOSIG ', 'KNOSIGT ': 'KT NOSIG ', '/VRB': ' VRB', 'CALMKT ': 'CALM ', 'CLMKT ': 'CALM ', 'CLRKT ': 'CALM ', ' <1/': ' M1/', '/04SM': '/4SM', '/4SSM': '/4SM', '/08SM': '/8SM', ' /34SM': '3/4SM', ' 3/SM': ' 3/4SM', 'PQ6SM ': 'P6SM ', 'P6000F ': 'P6000FT ', 'P6000FTQ ': 'P6000FT ', ' C A V O K ': ' CAVOK ', 'N0SIG': 'NOSIG', 'SCATTERED': 'SCT', 'BROKEN': 'BKN', 'OVERCAST': 'OVC', 'Z/ ': 'Z ', ' PROBB': ' PROB', ' PROBN': ' PROB', ' PROB3P': 'PROB30', ' TMM': ' TNM', ' TMN': ' TNM', ' TXN': ' TXM', ' TNTN': ' TN', ' TXTX': ' TX', ' TXX': ' TX'}
25 def sanitize_report_string(text: str, sans: Sanitization) -> str: 26 """Provide sanitization for operations that work better when the report is a string.""" 27 text = text.strip().upper().rstrip("=") 28 if len(text) < 4: 29 return text 30 # Standardize whitespace 31 text = " ".join(text.split()) 32 # Prevent changes to station ID 33 stid, text = text[:4], text[4:] 34 # Replace invalid key-value pairs 35 for key, rep in replacements.items(): 36 if key in text: 37 text = text.replace(key, rep) 38 sans.log(key, rep) 39 separated = separate_cloud_layers(text) 40 if text != separated: 41 sans.extra_spaces_needed = True 42 return stid + separated
Provide sanitization for operations that work better when the report is a string.
CLEANERS: list[type[avwx.parsing.sanitization.cleaners.base.CleanItem] | type[avwx.parsing.sanitization.cleaners.base.CleanPair] | type[avwx.parsing.sanitization.cleaners.base.RemoveItem] | type[avwx.parsing.sanitization.cleaners.base.SplitItem] | type[avwx.parsing.sanitization.cleaners.base.CombineItems]] =
[<class 'avwx.parsing.sanitization.cleaners.cleaners.OnlySlashes'>, <class 'avwx.parsing.sanitization.cleaners.wind.EmptyWind'>, <class 'avwx.parsing.sanitization.cleaners.cleaners.TrimWxCode'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedDistance'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedFirstTemperature'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedCloudAltitude'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedSecondTemperature'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedAltimeterLetter'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedTemperatureTrailingDigit'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedWindUnit'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedCloudQualifier'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedTafTimePrefix'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedMinMaxTemperaturePrefix'>, <class 'avwx.parsing.sanitization.cleaners.remove.remove_items_in.<locals>.RemoveInList'>, <class 'avwx.parsing.sanitization.cleaners.replace.ReplaceItem'>, <class 'avwx.parsing.sanitization.cleaners.remove.RemoveTafAmend'>, <class 'avwx.parsing.sanitization.cleaners.visibility.VisibilityGreaterThan'>, <class 'avwx.parsing.sanitization.cleaners.wind.MisplaceWindKT'>, <class 'avwx.parsing.sanitization.cleaners.wind.DoubleGust'>, <class 'avwx.parsing.sanitization.cleaners.wind.WindLeadingMistype'>, <class 'avwx.parsing.sanitization.cleaners.wind.NonGGust'>, <class 'avwx.parsing.sanitization.cleaners.wind.RemoveVrbLeadingDigits'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedCloud'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedTimestamp'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedWind'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedTafNewLine'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedMinMaxTemperature'>]
53 def sanitize_report_list(wxdata: list[str], sans: Sanitization) -> list[str]: 54 """Provide sanitization for operations that work better when the report is a list.""" 55 for i, item in reversed(list(enumerate(wxdata))): 56 for cleaner in _cleaners: 57 # TODO: Py3.10 change to match/case on type 58 if isinstance(cleaner, CombineItems): 59 if i and cleaner.can_handle(wxdata[i - 1], item): 60 wxdata[i - 1] += wxdata.pop(i) 61 sans.extra_spaces_found = True 62 if cleaner.should_break: 63 break 64 elif isinstance(cleaner, SplitItem): 65 if index := cleaner.split_at(item): 66 wxdata.insert(i + 1, item[index:]) 67 wxdata[i] = item[:index] 68 sans.extra_spaces_needed = True 69 if cleaner.should_break: 70 break 71 elif isinstance(cleaner, CleanPair): 72 if i and cleaner.can_handle(wxdata[i - 1], item): 73 clean_first, clean_second = cleaner.clean(wxdata[i - 1], item) 74 if wxdata[i - 1] != clean_first: 75 sans.log(wxdata[i - 1], clean_first) 76 wxdata[i - 1] = clean_first 77 if item != clean_second: 78 sans.log(item, clean_second) 79 wxdata[i] = clean_second 80 break 81 elif cleaner.can_handle(item): 82 if isinstance(cleaner, RemoveItem): 83 sans.log(wxdata.pop(i)) 84 elif isinstance(cleaner, CleanItem): 85 cleaned = cleaner.clean(item) 86 wxdata[i] = cleaned 87 sans.log(item, cleaned) 88 if cleaner.should_break: 89 break 90 91 # TODO: Replace with above syntax after testing? 92 # May wish to keep since some elements could be checked after space needed...but so could the others? 93 94 # Check for wind sanitization 95 for i, item in enumerate(wxdata): 96 # Skip Station 97 if i == 0: 98 continue 99 if is_variable_wind_direction(item): 100 replaced = item[:7] 101 wxdata[i] = replaced 102 sans.log(item, replaced) 103 continue 104 possible_wind = sanitize_wind(item) 105 if is_wind(possible_wind): 106 if item != possible_wind: 107 sans.log(item, possible_wind) 108 wxdata[i] = possible_wind 109 110 # Strip extra characters before dedupe 111 stripped = [i.strip("./\\") for i in wxdata] 112 if wxdata != stripped: 113 sans.log_list(wxdata, stripped) 114 deduped = dedupe(stripped, only_neighbors=True) 115 if len(deduped) != len(wxdata): 116 sans.duplicates_found = True 117 return deduped
Provide sanitization for operations that work better when the report is a list.