avwx.parsing.sanitization.metar

METAR sanitization support.

 1"""METAR sanitization support."""
 2
 3# module
 4from avwx.parsing.sanitization.base import sanitize_list_with, sanitize_string_with
 5from avwx.parsing.sanitization.cleaners.base import CleanerListType
 6from avwx.parsing.sanitization.cleaners.cleaners import OnlySlashes, TrimWxCode
 7from avwx.parsing.sanitization.cleaners.joined import (
 8    JoinedCloud,
 9    JoinedRunwayVisibility,
10    JoinedTimestamp,
11    JoinedWind,
12)
13from avwx.parsing.sanitization.cleaners.remove import RemoveFromMetar
14from avwx.parsing.sanitization.cleaners.replace import CURRENT, ReplaceItem
15from avwx.parsing.sanitization.cleaners.separated import (
16    SeparatedAltimeterLetter,
17    SeparatedCloudAltitude,
18    SeparatedCloudQualifier,
19    SeparatedDistance,
20    SeparatedFirstTemperature,
21    SeparatedSecondTemperature,
22    SeparatedTemperatureTrailingDigit,
23    SeparatedWindUnit,
24)
25from avwx.parsing.sanitization.cleaners.visibility import RunwayVisibilityUnit, VisibilityGreaterThan
26from avwx.parsing.sanitization.cleaners.wind import (
27    DoubleGust,
28    EmptyWind,
29    MisplaceWindKT,
30    NonGGust,
31    RemoveVrbLeadingDigits,
32    WindLeadingMistype,
33)
34
35METAR_REPL = {
36    **CURRENT,
37    "Z/ ": "Z ",
38}
39
40
41clean_metar_string = sanitize_string_with(METAR_REPL)
42
43
44CLEANERS: CleanerListType = [
45    OnlySlashes,
46    EmptyWind,
47    TrimWxCode,
48    SeparatedDistance,
49    SeparatedFirstTemperature,
50    SeparatedCloudAltitude,
51    SeparatedSecondTemperature,
52    SeparatedAltimeterLetter,
53    SeparatedTemperatureTrailingDigit,
54    SeparatedWindUnit,
55    SeparatedCloudQualifier,
56    RemoveFromMetar,
57    ReplaceItem,
58    VisibilityGreaterThan,
59    MisplaceWindKT,
60    RunwayVisibilityUnit,
61    DoubleGust,
62    WindLeadingMistype,
63    NonGGust,
64    RemoveVrbLeadingDigits,
65    JoinedCloud,
66    JoinedTimestamp,
67    JoinedWind,
68    JoinedRunwayVisibility,
69    ### Other wind fixes
70]
71
72clean_metar_list = sanitize_list_with(CLEANERS)
METAR_REPL = {'!': '1', '@': '2', '#': '3', '%': '5', '^': '6', '&': '7', '*': '8', '?': ' ', '"': '', "'": '', '`': '', '.': '', '(': ' ', ')': ' ', ';': ' ', 'MISSINGKT': '', ' 0I0': ' 090', 'NOSIGKT ': 'KT NOSIG ', 'KNOSIGT ': 'KT NOSIG ', '/VRB': ' VRB', 'CALMKT ': 'CALM ', 'CLMKT ': 'CALM ', 'CLRKT ': 'CALM ', ' <1/': ' M1/', '/04SM': '/4SM', '/4SSM': '/4SM', '/08SM': '/8SM', ' /34SM': '3/4SM', ' 3/SM': ' 3/4SM', 'PQ6SM ': 'P6SM ', 'P6000F ': 'P6000FT ', 'P6000FTQ ': 'P6000FT ', ' C A V O K ': ' CAVOK ', 'N0SIG': 'NOSIG', 'SCATTERED': 'SCT', 'BROKEN': 'BKN', 'OVERCAST': 'OVC', 'Z/ ': 'Z '}
def clean_metar_string(text: str, sans: avwx.structs.Sanitization) -> str:
25    def sanitize_report_string(text: str, sans: Sanitization) -> str:
26        """Provide sanitization for operations that work better when the report is a string."""
27        text = text.strip().upper().rstrip("=")
28        if len(text) < 4:
29            return text
30        # Standardize whitespace
31        text = " ".join(text.split())
32        # Prevent changes to station ID
33        stid, text = text[:4], text[4:]
34        # Replace invalid key-value pairs
35        for key, rep in replacements.items():
36            if key in text:
37                text = text.replace(key, rep)
38                sans.log(key, rep)
39        separated = separate_cloud_layers(text)
40        if text != separated:
41            sans.extra_spaces_needed = True
42        return stid + separated

Provide sanitization for operations that work better when the report is a string.

CLEANERS: list[type[avwx.parsing.sanitization.cleaners.base.CleanItem] | type[avwx.parsing.sanitization.cleaners.base.CleanPair] | type[avwx.parsing.sanitization.cleaners.base.RemoveItem] | type[avwx.parsing.sanitization.cleaners.base.SplitItem] | type[avwx.parsing.sanitization.cleaners.base.CombineItems]] = [<class 'avwx.parsing.sanitization.cleaners.cleaners.OnlySlashes'>, <class 'avwx.parsing.sanitization.cleaners.wind.EmptyWind'>, <class 'avwx.parsing.sanitization.cleaners.cleaners.TrimWxCode'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedDistance'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedFirstTemperature'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedCloudAltitude'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedSecondTemperature'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedAltimeterLetter'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedTemperatureTrailingDigit'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedWindUnit'>, <class 'avwx.parsing.sanitization.cleaners.separated.SeparatedCloudQualifier'>, <class 'avwx.parsing.sanitization.cleaners.remove.remove_items_in.<locals>.RemoveInList'>, <class 'avwx.parsing.sanitization.cleaners.replace.ReplaceItem'>, <class 'avwx.parsing.sanitization.cleaners.visibility.VisibilityGreaterThan'>, <class 'avwx.parsing.sanitization.cleaners.wind.MisplaceWindKT'>, <class 'avwx.parsing.sanitization.cleaners.visibility.RunwayVisibilityUnit'>, <class 'avwx.parsing.sanitization.cleaners.wind.DoubleGust'>, <class 'avwx.parsing.sanitization.cleaners.wind.WindLeadingMistype'>, <class 'avwx.parsing.sanitization.cleaners.wind.NonGGust'>, <class 'avwx.parsing.sanitization.cleaners.wind.RemoveVrbLeadingDigits'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedCloud'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedTimestamp'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedWind'>, <class 'avwx.parsing.sanitization.cleaners.joined.JoinedRunwayVisibility'>]
def clean_metar_list(wxdata: list[str], sans: avwx.structs.Sanitization) -> list[str]:
 53    def sanitize_report_list(wxdata: list[str], sans: Sanitization) -> list[str]:
 54        """Provide sanitization for operations that work better when the report is a list."""
 55        for i, item in reversed(list(enumerate(wxdata))):
 56            for cleaner in _cleaners:
 57                # TODO: Py3.10 change to match/case on type
 58                if isinstance(cleaner, CombineItems):
 59                    if i and cleaner.can_handle(wxdata[i - 1], item):
 60                        wxdata[i - 1] += wxdata.pop(i)
 61                        sans.extra_spaces_found = True
 62                        if cleaner.should_break:
 63                            break
 64                elif isinstance(cleaner, SplitItem):
 65                    if index := cleaner.split_at(item):
 66                        wxdata.insert(i + 1, item[index:])
 67                        wxdata[i] = item[:index]
 68                        sans.extra_spaces_needed = True
 69                        if cleaner.should_break:
 70                            break
 71                elif isinstance(cleaner, CleanPair):
 72                    if i and cleaner.can_handle(wxdata[i - 1], item):
 73                        clean_first, clean_second = cleaner.clean(wxdata[i - 1], item)
 74                        if wxdata[i - 1] != clean_first:
 75                            sans.log(wxdata[i - 1], clean_first)
 76                            wxdata[i - 1] = clean_first
 77                        if item != clean_second:
 78                            sans.log(item, clean_second)
 79                            wxdata[i] = clean_second
 80                            break
 81                elif cleaner.can_handle(item):
 82                    if isinstance(cleaner, RemoveItem):
 83                        sans.log(wxdata.pop(i))
 84                    elif isinstance(cleaner, CleanItem):
 85                        cleaned = cleaner.clean(item)
 86                        wxdata[i] = cleaned
 87                        sans.log(item, cleaned)
 88                    if cleaner.should_break:
 89                        break
 90
 91        # TODO: Replace with above syntax after testing?
 92        # May wish to keep since some elements could be checked after space needed...but so could the others?
 93
 94        # Check for wind sanitization
 95        for i, item in enumerate(wxdata):
 96            # Skip Station
 97            if i == 0:
 98                continue
 99            if is_variable_wind_direction(item):
100                replaced = item[:7]
101                wxdata[i] = replaced
102                sans.log(item, replaced)
103                continue
104            possible_wind = sanitize_wind(item)
105            if is_wind(possible_wind):
106                if item != possible_wind:
107                    sans.log(item, possible_wind)
108                wxdata[i] = possible_wind
109
110        # Strip extra characters before dedupe
111        stripped = [i.strip("./\\") for i in wxdata]
112        if wxdata != stripped:
113            sans.log_list(wxdata, stripped)
114        deduped = dedupe(stripped, only_neighbors=True)
115        if len(deduped) != len(wxdata):
116            sans.duplicates_found = True
117        return deduped

Provide sanitization for operations that work better when the report is a list.