avwx.parsing.sanitization.metar
METAR sanitization support.
1"""METAR sanitization support.""" 2 3# module 4from avwx.parsing.sanitization.base import sanitize_list_with, sanitize_string_with 5from avwx.parsing.sanitization.cleaners.base import CleanerListType 6from avwx.parsing.sanitization.cleaners.cleaners import OnlySlashes, TrimWxCode 7from avwx.parsing.sanitization.cleaners.joined import ( 8 JoinedCloud, 9 JoinedRunwayVisibility, 10 JoinedTimestamp, 11 JoinedWind, 12) 13from avwx.parsing.sanitization.cleaners.remove import RemoveFromMetar 14from avwx.parsing.sanitization.cleaners.replace import CURRENT, ReplaceItem 15from avwx.parsing.sanitization.cleaners.separated import ( 16 SeparatedAltimeterLetter, 17 SeparatedCloudAltitude, 18 SeparatedCloudQualifier, 19 SeparatedDistance, 20 SeparatedFirstTemperature, 21 SeparatedSecondTemperature, 22 SeparatedTemperatureTrailingDigit, 23 SeparatedWindUnit, 24) 25from avwx.parsing.sanitization.cleaners.visibility import RunwayVisibilityUnit, VisibilityGreaterThan 26from avwx.parsing.sanitization.cleaners.wind import ( 27 DoubleGust, 28 EmptyWind, 29 MisplaceWindKT, 30 NonGGust, 31 RemoveVrbLeadingDigits, 32 WindLeadingMistype, 33) 34 35METAR_REPL = { 36 **CURRENT, 37 "Z/ ": "Z ", 38} 39 40 41clean_metar_string = sanitize_string_with(METAR_REPL) 42 43 44CLEANERS: CleanerListType = [ 45 OnlySlashes, 46 EmptyWind, 47 TrimWxCode, 48 SeparatedDistance, 49 SeparatedFirstTemperature, 50 SeparatedCloudAltitude, 51 SeparatedSecondTemperature, 52 SeparatedAltimeterLetter, 53 SeparatedTemperatureTrailingDigit, 54 SeparatedWindUnit, 55 SeparatedCloudQualifier, 56 RemoveFromMetar, 57 ReplaceItem, 58 VisibilityGreaterThan, 59 MisplaceWindKT, 60 RunwayVisibilityUnit, 61 DoubleGust, 62 WindLeadingMistype, 63 NonGGust, 64 RemoveVrbLeadingDigits, 65 JoinedCloud, 66 JoinedTimestamp, 67 JoinedWind, 68 JoinedRunwayVisibility, 69 ### Other wind fixes 70] 71 72clean_metar_list = sanitize_list_with(CLEANERS)
METAR_REPL =
{'!': '1', '@': '2', '#': '3', '%': '5', '^': '6', '&': '7', '*': '8', '?': ' ', '"': '', "'": '', '`': '', '.': '', '(': ' ', ')': ' ', ';': ' ', 'MISSINGKT': '', ' 0I0': ' 090', 'NOSIGKT ': 'KT NOSIG ', 'KNOSIGT ': 'KT NOSIG ', '/VRB': ' VRB', 'CALMKT ': 'CALM ', 'CLMKT ': 'CALM ', 'CLRKT ': 'CALM ', ' <1/': ' M1/', '/04SM': '/4SM', '/4SSM': '/4SM', '/08SM': '/8SM', ' /34SM': '3/4SM', ' 3/SM': ' 3/4SM', 'PQ6SM ': 'P6SM ', 'P6000F ': 'P6000FT ', 'P6000FTQ ': 'P6000FT ', ' C A V O K ': ' CAVOK ', 'N0SIG': 'NOSIG', 'SCATTERED': 'SCT', 'BROKEN': 'BKN', 'OVERCAST': 'OVC', 'Z/ ': 'Z '}
25 def sanitize_report_string(text: str, sans: Sanitization) -> str: 26 """Provide sanitization for operations that work better when the report is a string.""" 27 text = text.strip().upper().rstrip("=") 28 if len(text) < 4: 29 return text 30 # Standardize whitespace 31 text = " ".join(text.split()) 32 # Prevent changes to station ID 33 stid, text = text[:4], text[4:] 34 # Replace invalid key-value pairs 35 for key, rep in replacements.items(): 36 if key in text: 37 text = text.replace(key, rep) 38 sans.log(key, rep) 39 separated = separate_cloud_layers(text) 40 if text != separated: 41 sans.extra_spaces_needed = True 42 return stid + separated
Provide sanitization for operations that work better when the report is a string.
CLEANERS: list[type[avwx.parsing.sanitization.cleaners.base.CleanItem] | type[avwx.parsing.sanitization.cleaners.base.CleanPair] | type[avwx.parsing.sanitization.cleaners.base.RemoveItem] | type[avwx.parsing.sanitization.cleaners.base.SplitItem] | type[avwx.parsing.sanitization.cleaners.base.CombineItems]] =
[<class 'avwx.parsing.sanitization.cleaners.cleaners.OnlySlashes'>, <class 'avwx.parsing.sanitization.cleaners.wind.EmptyWind'>, <class 'avwx.parsing.sanitization.cleaners.cleaners.TrimWxCode'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedDistance'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedFirstTemperature'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedCloudAltitude'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedSecondTemperature'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedAltimeterLetter'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedTemperatureTrailingDigit'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedWindUnit'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedCloudQualifier'>, <class 'avwx.parsing.sanitization.cleaners.remove.remove_items_in.<locals>.RemoveInList'>, <class 'avwx.parsing.sanitization.cleaners.replace.ReplaceItem'>, <class 'avwx.parsing.sanitization.cleaners.visibility.VisibilityGreaterThan'>, <class 'avwx.parsing.sanitization.cleaners.wind.MisplaceWindKT'>, <class 'avwx.parsing.sanitization.cleaners.visibility.RunwayVisibilityUnit'>, <class 'avwx.parsing.sanitization.cleaners.wind.DoubleGust'>, <class 'avwx.parsing.sanitization.cleaners.wind.WindLeadingMistype'>, <class 'avwx.parsing.sanitization.cleaners.wind.NonGGust'>, <class 'avwx.parsing.sanitization.cleaners.wind.RemoveVrbLeadingDigits'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedCloud'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedTimestamp'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedWind'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedRunwayVisibility'>]
53 def sanitize_report_list(wxdata: list[str], sans: Sanitization) -> list[str]: 54 """Provide sanitization for operations that work better when the report is a list.""" 55 for i, item in reversed(list(enumerate(wxdata))): 56 for cleaner in _cleaners: 57 # TODO: Py3.10 change to match/case on type 58 if isinstance(cleaner, CombineItems): 59 if i and cleaner.can_handle(wxdata[i - 1], item): 60 wxdata[i - 1] += wxdata.pop(i) 61 sans.extra_spaces_found = True 62 if cleaner.should_break: 63 break 64 elif isinstance(cleaner, SplitItem): 65 if index := cleaner.split_at(item): 66 wxdata.insert(i + 1, item[index:]) 67 wxdata[i] = item[:index] 68 sans.extra_spaces_needed = True 69 if cleaner.should_break: 70 break 71 elif isinstance(cleaner, CleanPair): 72 if i and cleaner.can_handle(wxdata[i - 1], item): 73 clean_first, clean_second = cleaner.clean(wxdata[i - 1], item) 74 if wxdata[i - 1] != clean_first: 75 sans.log(wxdata[i - 1], clean_first) 76 wxdata[i - 1] = clean_first 77 if item != clean_second: 78 sans.log(item, clean_second) 79 wxdata[i] = clean_second 80 break 81 elif cleaner.can_handle(item): 82 if isinstance(cleaner, RemoveItem): 83 sans.log(wxdata.pop(i)) 84 elif isinstance(cleaner, CleanItem): 85 cleaned = cleaner.clean(item) 86 wxdata[i] = cleaned 87 sans.log(item, cleaned) 88 if cleaner.should_break: 89 break 90 91 # TODO: Replace with above syntax after testing? 92 # May wish to keep since some elements could be checked after space needed...but so could the others? 93 94 # Check for wind sanitization 95 for i, item in enumerate(wxdata): 96 # Skip Station 97 if i == 0: 98 continue 99 if is_variable_wind_direction(item): 100 replaced = item[:7] 101 wxdata[i] = replaced 102 sans.log(item, replaced) 103 continue 104 possible_wind = sanitize_wind(item) 105 if is_wind(possible_wind): 106 if item != possible_wind: 107 sans.log(item, possible_wind) 108 wxdata[i] = possible_wind 109 110 # Strip extra characters before dedupe 111 stripped = [i.strip("./\\") for i in wxdata] 112 if wxdata != stripped: 113 sans.log_list(wxdata, stripped) 114 deduped = dedupe(stripped, only_neighbors=True) 115 if len(deduped) != len(wxdata): 116 sans.duplicates_found = True 117 return deduped
Provide sanitization for operations that work better when the report is a list.