DART学习

前言

DART是一款NCAR开发的资料同化系统，可以直接与CESM、WRF等模式结合。同化包括MLS、SABER等卫星资料。这个文章记录一下，使用WACCM+DART中遇到的一些坑。

DART_config

在DART/models/cam-fv/shell_scripts/cesm2_1/中运行setup_hybrid，这将创建CESM的case。在caseroot下将创建DART_config，他相当于是DART的激活器。运行DART_config才能在运行case中实现同化，否则就是普通的CESM的case运行。

然而自动生成DART_config有个小bug

if (-f ${DART_SCRIPTS_DIR}/compress.csh) then
   $COPY -f -${VERBOSE} ${DART_SCRIPTS_DIR}/compress.csh . || exit 43
else
   echo "ERROR: no compress.csh in  ${DART_SCRIPTS_DIR}"
   exit 45
endif

这里应当删除${VERBOSE}前的短横线，改为

if (-f ${DART_SCRIPTS_DIR}/compress.csh) then
   $COPY -f ${VERBOSE} ${DART_SCRIPTS_DIR}/compress.csh . || exit 43
else
   echo "ERROR: no compress.csh in  ${DART_SCRIPTS_DIR}"
   exit 45
endif

1 2	chmod +x DART_config ./DART_config

input.nml

运行DART_config 后会在caseroot下生成input.nml 这和atm_in类似，但它是用来控制DART的namelist文件。

此时DART已经被激活了, 用 ./xmlquery -p ASSIMI 可查看到相关信息

1
2
3

DATA_ASSIMILATION: ['CPL:FALSE', 'ATM:TRUE', 'LND:FALSE', 'ICE:FALSE', 'OCN:FALSE', 'ROF:FALSE', 'GLC:FALSE', 'WAV:FALSE']
DATA_ASSIMILATION_CYCLES: 1
DATA_ASSIMILATION_SCRIPT: /public/home/elzd_2024_000125/CESM/cases/DART/SSW2013/FWSD.2013.70Lev.DART/no_assimilate.csh

以上信息表明，只有大气分量将被同化，且只同化一次就停止运行（默认每运行6小时同化一次）。最后一条DATA_ASSIMILATION_SCRIPT=no_assimilation.csh 表明这里使用的是无同化脚本no_assimilation.csh 。这是一个测试用脚本，这个时候运行case，同样不会用同化。需要

1	./xmlchange DATA_ASSIMILATION_SCRIPT="assimilation.csh"

切换成使用assimilation.csh，这才是真正的同化脚本。

说回input.nml

有几点需要修改

! This namelist is set up for a single, CAM-FV, assimilation cycle
! using the default values as found in model_mod.f90 and
! shell_scripts/cesm2_1/setup_{hybrid,advanced}
! starting from a single model state, which must be perturbed into an ensemble.
! Comments below give suggestions for setting it up for other assimilations:
! > continuing an assimilation (no perturbations) and/or starting from an ensemble.
! > Setting up a WACCM(-X) assimilation
! > Setting up for perfect_model_obs
!
! PLEASE READ
!
! cam-fv/model_mod.html
!
! for recommendations on namelist settings for CAM.

! ens_size, num_output_*  will be (re)set by the setup script
! To use a pre-existing ensemble, make the following changes
! This applies to the second cycle after starting from a single ensemble member.
! It's not necessary to change any other variables controlling the perturbation
! because this will cause them to be ignored.
!    perturb_from_single_instance = .false.
! Other variables of interest
! stages_to_write          Controls diagnostic and restart output.  Valid values are
!                          'input', 'forecast','preassim', 'postassim', 'analysis', and 'output'.
!                          If only prior inflation is used, then 'postassim' and 'analysis'
!                          are redundant with 'output'.  Just use 'output'.
!                          If only posterior inflation is used, 'forecast' and 'preassim'
!                          are redundant with 'input'.
!                          If you want input_mean and input_sd, you'll
!                          need to set output_mean and output_sd = .true.
!                          (and include 'input' in stages_to_write).
!    inf_initial_from_restart These should be true because assimilate.csh will create
!    inf_sd_from_restart      inflation restart files from the values in inf*_initial
!                             if needed.

&probit_transform_nml
   /

&algorithm_info_nml
   qceff_table_filename = ''
   /

&filter_nml
   input_state_file_list        = 'cam_init_files'
   input_state_files            = ''
   single_file_in               = .false.
   perturb_from_single_instance = .false.
   init_time_days               = -1
   init_time_seconds            = -1

! 此处修改为'out'
   stages_to_write              = 'output'

   output_state_files           = ''
   output_state_file_list       = 'cam_init_files'
   output_mean                  = .true.
   output_sd                    = .true.
   output_members               = .true.

! 和成员数相同
   num_output_state_members     = 5
   
   single_file_out              = .false.
   write_all_stages_at_end      = .false.
   output_interval              = 1

! 和成员数相同
   ens_size                     = 5

   num_groups                   = 1
   distributed_state            = .true.

! 设为2或5，详情见官方文档
   inf_flavor                  = 2,                       0

! 设为.true.    .false， 第一列表示prior，第二列表示posterior
   inf_initial_from_restart    = .true.,                  .false.

! 主要改第一列，适当比1大，是inflation的系数
   inf_initial                 = 1.2,                     1.0

! flation的上下限
   inf_lower_bound             = 1.02,                     0.0
   inf_upper_bound             = 100.0,                   100.0

! 修改以下，详情见官方文档
   inf_sd_initial_from_restart = .true.,                  .false.
   inf_sd_initial              = 0.6,                     0.6
   inf_sd_lower_bound          = 0.6,                     0.6
   inf_sd_max_change           = 1.05,                    1.05
   inf_damping                 = 0.9,                     0.0
   inf_deterministic           = .true.,                  .true.

   obs_sequence_in_name     = 'obs_seq.out'
   obs_sequence_out_name    = 'obs_seq.final'

! 和成员数相同
   num_output_obs_members   = 5

   compute_posterior        = .true.

   trace_execution          = .true.
   output_timestamps        = .true.
   output_forward_op_errors = .false.
   silence                  = .false.
   /
! Moha's enhanced (gamma distribution) adaptive:
!    inf_flavor                  = 5,                       0
!    inf_lower_bound             = 0.0,                     0.0
! flavor 2
!    inf_flavor                  = 2,                       0
!    inf_lower_bound             = 1.0,                     1.0


! Not used in CAM assims
   first_obs_days           = -1
   first_obs_seconds        = -1
   last_obs_days            = -1
   last_obs_seconds         = -1
   obs_window_days          = -1
   obs_window_seconds       = -1
   adv_ens_command          = 'no_CESM_advance_script'
   tasks_per_model_advance      = -1  Used only for models run inside filter.
   write_obs_every_cycle        = .false. intended for debugging when cycling inside filter.

&perfect_model_obs_nml
   read_input_state_from_file = .true.
   input_state_files          = "caminput.nc"
   init_time_days             = -1
   init_time_seconds          = -1

   write_output_state_to_file = .true.
   output_state_files         = "perfect_restart.nc"

   obs_seq_in_file_name       = "obs_seq.in"
   obs_seq_out_file_name      = "obs_seq.out"
   first_obs_days             = -1
   first_obs_seconds          = -1
   last_obs_days              = -1
   last_obs_seconds           = -1

   trace_execution            = .true.
   output_timestamps          = .true.
   print_every_nth_obs        = 0
   output_forward_op_errors   = .false.
   /

#========================================================================
# Start of CAM-FV dependencies and general discussion.
#========================================================================
!
! Creation of initial ensemble from a single model state.
!     fields_to_perturb lists the DART QTY_s of the state variables to be perturbed to make the ensemble.
!     perturbation_amplitude > 0 allows each point of the fields_to_perturb fields of each ens member 
!         to be randomly perturbed with a standard deviation of perturbation_amplitude.  
!         Each field can be given a different perturbation_amplitude.
!     Used by filter's call to pert_model_copies.
!
! state_variables (5 columns for each variable):
!     netcdf varname, dart quantity, min allowed value, max allowed value, (no)update this var
!
! vert_normalization_YYY
!     The vert_normalization_scale_height default value was chosen based on 
!     Pedatella's settling on 1.5 (scale heights/radian), based on tuning experiments.
!     This is supported by tuning experiments with CAM5.
!
! use_log_vertical_scale(vertical interpolation only):
!     .false. or .true.
!
! no_obs_assim_above_level 
!     Prevents assimilation of observations whose vertical location is above
!     this model level.  Note that, if this value is set to a large value,
!     it may be within CAM's hybrid coordinate layers instead of in the pure pressure layers.
!     This will result in the the observation cutoff height being at different pressure levels
!     over mountains versus lower areas.

! model_damping_ends_at_level
!     This controls how much innovations are reduced near the model top, to mitigate the effects 
!     of the extra diffusion sometimes applied there in CAM and WACCM (see fv_div24del2flag).
!     The default value (-1) turns off the damping and relies on the choices of the following 
!     variables to prevent assimilation from happening in CAM's diffusive top layers:
!        no_obs_assim_above_level,
!        use_log_vertical_scale,
!        vert_normalization_YYY,
!        cutoff.
!     When it is turned on (> 0), it is the lowest level which will be damped.  
!     Damping increases with height (smaller level numbers).
!     The values given below are the minimums recommended for various models.
!     You can start with the minimum and increase it if there seems to be excessive
!     noise in the upper layers.
!
! CAM-FV Section
!     Model top 220 Pa
!     Number of CAM model top levels with extra diffusion, controlled by div24del2:
!         2 = div2    -> 2 levels
!         4,24 = del2 -> 3 levels 
!     CAM assimilations can use pressure or scale height vertical coordinate.
!     We recommend scale height.
!        use_log_vertical_scale          = .true.
!        vert_normalization_scale_height = 1.5 
!        vert_normalization_pressure     = 20000. 
!     
!     26 levels (CAM4):
!        no_obs_assim_above_level            = 5       ! corresponds to ~3700 Pa
!     30 levels (CAM5):
!        no_obs_assim_above_level            = 5       ! corresponds to ~3800 Pa
!     32 levels (CAM6):
!        no_obs_assim_above_level            = 5       ! corresponds to ~3600 Pa
!
! WACCM 
!     The model top for WACCM is (naturally) much higher: 4.5e-4 Pa
!     The number of model top levels with extra diffusion is controlled by WACCM's 
!     fv_div24del2flag:
!         2 = div2    -> 3 levels
!         4,24 = del2 -> 4 levels
!     70 levels (WACCM4):
!        no_obs_assim_above_level            = 7       ! corresponds to 0.012 Pa
!     This values must be used with WACCM assimilations;
!        use_log_vertical_scale          = .true.
!     This is recommended, but your own tuning experiments may support a different value.
!        vert_normalization_scale_height = 1.5
!
!========================================================================

&model_nml
   !cam_template_filename       = 'caminput.nc'
   cam_template_filename       = 'caminput.nc'
   cam_phis_filename           = 'cam_phis.nc'
   custom_routine_to_generate_ensemble = .false.

! 不做扰动，因为没有用dart自己添加扰动成功过。 自己手动加好扰动
   fields_to_perturb                   = ''


   perturbation_amplitude              = 0.1
   state_variables  = 'T',     'QTY_TEMPERATURE',         'NA', 'NA', 'UPDATE'
                      'US',    'QTY_U_WIND_COMPONENT',    'NA', 'NA', 'UPDATE'
                      'VS',    'QTY_V_WIND_COMPONENT',    'NA', 'NA', 'UPDATE'
                      'Q',     'QTY_SPECIFIC_HUMIDITY',   'NA', 'NA', 'UPDATE'
                      'CLDLIQ','QTY_CLOUD_LIQUID_WATER',  'NA', 'NA', 'UPDATE'
                      'CLDICE','QTY_CLOUD_ICE',           'NA', 'NA', 'UPDATE'
                      'PS',    'QTY_SURFACE_PRESSURE',    'NA', 'NA', 'UPDATE'
   use_log_vertical_scale              = .true.

! 设为.true.
   use_variable_mean_mass              = .true.
   no_normalization_of_scale_heights   = .true.
   vertical_localization_coord         = 'SCALEHEIGHT'
   no_obs_assim_above_level            = 5
   model_damping_ends_at_level         = -1
   using_chemistry                = .false.
   assimilation_period_days       = 0
   assimilation_period_seconds    = 21600
   suppress_grid_info_in_output   = .false.
   debug_level                    = 0
   /

! Other fields in the CAM initial file, which could be included in the model state:
! These QTYs should be changed to physically meaningful values before any real assim.
!                       'DMS',   'QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'H2O2',  'QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'H2SO4', 'QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'NUMICE','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'NUMLIQ','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'NUMRAI','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'NUMSNO','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'RAINQM','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'SNOWQM','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'SO2',   'QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'SOAG',  'QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'bc_a1', 'QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'bc_a4', 'QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'dst_a1','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'dst_a2','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'dst_a3','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'ncl_a1','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'ncl_a2','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'ncl_a3','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'num_a1','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'num_a2','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'num_a3','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'num_a4','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'pom_a1','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'pom_a4','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'so4_a1','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'so4_a2','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'so4_a3','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'soa_a1','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
!                       'soa_a2','QTY_3D_PARAMETER',    'NA', 'NA', 'UPDATE'
&location_nml
   horiz_dist_only                 = .false.
   vert_normalization_pressure     = 20000.0
   vert_normalization_height       = 10000.0
   vert_normalization_level        = 20.0
   vert_normalization_scale_height = 1.5
   approximate_distance            = .true.
   nlon                            = 141
   nlat                            = 72
   output_box_info                 = .false.
   print_box_level                 = 0
   /

#========================================================================
# End of CAM-FV dependencies.
#========================================================================

&fill_inflation_restart_nml
   write_prior_inf = .true.

! inflation系数，适当地比1大
   prior_inf_mean  = 1.2
   prior_inf_sd    = 0.6

   write_post_inf  = .false.
   post_inf_mean   = 1.00
   post_inf_sd     = 0.6

   input_state_files = 'caminput.nc'
   single_file       = .false.
                       
   verbose           = .false.
   /

! to use chemistry or saber temperatures, include the following below.
!                             '../../../observations/forward_operators/obs_def_CO_Nadir_mod.f90',
!                             '../../../observations/forward_operators/obs_def_SABER_mod.f90',
!                             '../../../observations/forward_operators/obs_def_MOPITT_CO_mod.f90',

&preprocess_nml
   overwrite_output        = .true.
   input_obs_qty_mod_file  = '../../../assimilation_code/modules/observations/DEFAULT_obs_kind_mod.F90'
   output_obs_qty_mod_file = '../../../assimilation_code/modules/observations/obs_kind_mod.f90'
   input_obs_def_mod_file  = '../../../observations/forward_operators/DEFAULT_obs_def_mod.F90'
   output_obs_def_mod_file = '../../../observations/forward_operators/obs_def_mod.f90'
   obs_type_files          = '../../../observations/forward_operators/obs_def_gps_mod.f90',
                             '../../../observations/forward_operators/obs_def_upper_atm_mod.f90',
                             '../../../observations/forward_operators/obs_def_reanalysis_bufr_mod.f90',
                             '../../../observations/forward_operators/obs_def_altimeter_mod.f90'
   quantity_files          = '../../../assimilation_code/modules/observations/atmosphere_quantities_mod.f90',
                             '../../../assimilation_code/modules/observations/space_quantities_mod.f90',
                             '../../../assimilation_code/modules/observations/chemistry_quantities_mod.f90'
   /

! Not usually assimilated.  No fundamental reason not to.
!                               'RADIOSONDE_SPECIFIC_HUMIDITY',
! Available from mid-2006 onward.  Build filter with obs_def_gps_mod.f90
!                               'GPSRO_REFRACTIVITY',
! WACCM can use higher observations than CAM.
! An example can be included via obs_def_SABER_mod.f90.
!                               'SABER_TEMPERATURE',

&obs_kind_nml
   assimilate_these_obs_types = 'AURAMLS_TEMPERATURE'


!   assimilate_these_obs_types = 'RADIOSONDE_U_WIND_COMPONENT',
!                                'RADIOSONDE_V_WIND_COMPONENT',
!                                'RADIOSONDE_TEMPERATURE',
!                                'AIRCRAFT_U_WIND_COMPONENT',
!                                'AIRCRAFT_V_WIND_COMPONENT',
!                                'AIRCRAFT_TEMPERATURE',
!                                'ACARS_U_WIND_COMPONENT',
!                                'ACARS_V_WIND_COMPONENT',
!                                'ACARS_TEMPERATURE',
!                                'SAT_U_WIND_COMPONENT',
!                                'SAT_V_WIND_COMPONENT',
!                                'GPSRO_REFRACTIVITY'

!   evaluate_these_obs_types = 'RADIOSONDE_SPECIFIC_HUMIDITY',
   /


&state_vector_io_nml
   buffer_state_io         = .false.
   single_precision_output = .false.
   /


! 'layout' and 'tasks_per_node' will be reset by the assimilate.csh script
! to match the number used when laying out the job.

&ensemble_manager_nml
   layout         = 2
   tasks_per_node = 16
   /


&assim_tools_nml
   cutoff                            = 0.15
   sort_obs_inc                      = .false.
   spread_restoration                = .false.
   sampling_error_correction       = .true.
   adaptive_localization_threshold   = -1
   output_localization_diagnostics   = .false.
   localization_diagnostics_file     = 'localization_diagnostics'
   convert_all_obs_verticals_first   = .true.
   convert_all_state_verticals_first = .true.
   print_every_nth_obs               = 10000
   distribute_mean                   = .false.
   /


&cov_cutoff_nml
   select_localization = 1
   /


&reg_factor_nml
   select_regression    = 1
   input_reg_file       = 'time_mean_reg'
   save_reg_diagnostics = .false.
   reg_diagnostics_file = 'reg_diagnostics'
   /


&obs_sequence_nml
   write_binary_obs_sequence = .false.
   /


&quality_control_nml
   input_qc_threshold          =  3.0
   outlier_threshold           =  3.0
   enable_special_outlier_code = .false.
   /


&xyz_location_nml
   /


!  : error codes >= TERMLEVEL will cause termination
!  E_DBG = -2,   E_MSG = -1,  E_ALLMSG = 0, E_WARN = 1, E_ERR = 2
!  write_nml default is 'file'.
!    write_nml = 'none'  reduces printed output.
&utilities_nml
   TERMLEVEL      = 2
   module_details = .false.
   logfilename    = 'dart_log.out'
   nmlfilename    = 'dart_log.nml'
   /


&mpi_utilities_nml
   /


&obs_def_gps_nml
   max_gpsro_obs = 15000000
   /


#========================================================================
# observation manipulation tools
#========================================================================

! other possible obs tool namelist items:
!
! keep only the U and V radiosonde winds:
!   obs_types          = 'RADIOSONDE_U_WIND_COMPONENT'
!                        'RADIOSONDE_V_WIND_COMPONENT'
!   keep_types         = .true.
!
! remove the U and V radiosonde winds:
!   obs_types          = 'RADIOSONDE_U_WIND_COMPONENT'
!                        'RADIOSONDE_V_WIND_COMPONENT'
!   keep_types         = .false.
!
! keep only observations with a DART QC of 0:
!   qc_metadata        = 'Dart quality control'
!   min_qc             = 0
!   max_qc             = 0
!
! keep only radiosonde temp obs between 250 and 300 K:
!   copy_metadata      = 'NCEP BUFR observation'
!   copy_type          = 'RADIOSONDE_TEMPERATURE'
!   min_copy           = 250.0
!   max_copy           = 300.0


&obs_sequence_tool_nml
   num_input_files    = 2
   filename_seq       = 'obs_seq.one', 'obs_seq.two'
   filename_out       = 'obs_seq.processed'
   first_obs_days     = -1
   first_obs_seconds  = -1
   last_obs_days      = -1
   last_obs_seconds   = -1
   min_lat            =  -90.0
   max_lat            =   90.0
   min_lon            =    0.0
   max_lon            =  360.0
   gregorian_cal      = .true.
   print_only         =  .false.
   /


&obs_common_subset_nml
   num_to_compare_at_once   = 2
   filename_seq             = ''
   filename_seq_list        = ''
   filename_out_suffix      = '.common' 
   print_only               = .false.
   print_every              = 10000
   calendar                 = 'Gregorian'
   dart_qc_threshold        = 3
   eval_and_assim_can_match = .false.
   /


&obs_impact_tool_nml
   input_filename         = 'cross_correlations.txt'
   output_filename        = 'control_impact_runtime.txt'
   debug                  = .false.
   /




#========================================================================
# diagnostic tools
#========================================================================

! The times in the namelist for the obs_diag program are vectors
! that follow the following sequence:
! year   month   day   hour   minute   second
! max_num_bins can be used to specify a fixed number of bins,
! in which case last_bin_center should be safely in the future.
!
! Acceptable latitudes range from  [-90,  90]
! Acceptable longitudes range from [  0, Inf]
!
! Other available namelist variables, not in the default obs_diag.nml:
!  hlevel
!  mlevel
!  print_obs_locations
!  outliers_in_histogram
!  plevel_edges
!  hlevel_edges
!  mlevel_edges
! Standard layers:
! 1000, 925, 850, 700, 600, 500, 400, 300,  250,  200,  150,  100,   70,   50,   30,   20,   10 hPa
!    +950(MetOffc)     -600(skipped in obs_diag.f90 defaults)   -70 and less skipped in obs_diag.f90
! Corresponding heights (assuming a standard atmosphere)
! 200,  650, 1350, 2900,4100,5480,7090,9080,10280,11700,13520,16100,18358,21060,24640,27480,32330
! I've changed the height vertical axis in plot_rmse_xxx* to be logarithmic 
! in order to make the layers look more like the pressure layers.
! So the bottom edge can't be 0.
! The lowest GPS ob is 200, so that's the new lowest edge
!
!    plevel = 1000.,925.,850.,700.,500.,400.,300.,250.,200.,150.,100.,50.,20.,10.
!    hlevel = 1000., 2000., 3000., 4000., 5000., 6000., 7000., 8000., 9000., 10000.,11000.
!             0, 1500, 2500, 3500, 4500, 5500, 6500, 7500, 9500, 11500, 13500, 15500
!
! Defaults
!    plevel = 1000.,850.,700.,500.,400.,300.,200.,150.,100.
!    Nregions   = 4
!    lonlim1    =   0.0,   0.0,   0.0, 235.0
!    lonlim2    = 360.0, 360.0, 360.0, 295.0
!    latlim1    =  20.0, -80.0, -20.0,  25.0
!    latlim2    =  80.0, -20.0,  20.0,  55.0
!    reg_names  = 'Northern Hemisphere', 'Southern Hemisphere', 'Tropics', 'North America'
!
! for WACCM you will want to change the plevel to match
! the higher vertical range of the model.
!   plevel = 1000.,850.,700.,500.,400.,300.,200.,150.,100.
! these are specified in hectopascals (hPa)

&obs_diag_nml
   obs_sequence_name = 'obs_seq.final'
   obs_sequence_list = ''
   first_bin_center  =  BOGUS_YEAR, 1, 1, 0, 0, 0
   last_bin_center   =  BOGUS_YEAR, 1, 2, 0, 0, 0
   bin_separation    =     0, 0, 0, 6, 0, 0
   bin_width         =     0, 0, 0, 6, 0, 0
   time_to_skip      =     0, 0, 1, 0, 0, 0
   max_num_bins      = 1000
   trusted_obs       = 'null'
   plevel_edges = 1035.5, 962.5, 887.5, 775, 600, 450, 350, 275, 225,   175,   125,   75,   35,   15,    2
   hlevel_edges =    200, 630,   930,  1880,3670,5680,7440,9130,10530,12290, 14650,18220,23560,29490,43000
   Nregions     = 3
   reg_names    = 'Northern Hemisphere', 'Tropics', 'Southern Hemisphere'
   lonlim1      =   0.0,   0.0,   0.0
   lonlim2      = 360.0, 360.0, 360.0
   latlim1      =  20.0, -20.0, -90.0
   latlim2      =  90.0,  20.0, -20.0
   print_mismatched_locs = .false.
   create_rank_histogram = .true.
   outliers_in_histogram = .true.
   use_zero_error_obs    = .false.
   verbose               = .false.
   /


&schedule_nml
   calendar             = 'Gregorian'
   first_bin_start      =  1601,  1,  1,  0,  0,  0
   first_bin_end        =  2999,  1,  1,  0,  0,  0
   last_bin_end         =  2999,  1,  1,  0,  0,  0
   bin_interval_days    = 1000000
   bin_interval_seconds = 0
   max_num_bins         = 1000
   print_table          = .true.
   /


&obs_seq_to_netcdf_nml
   obs_sequence_name = 'obs_seq.final'
   obs_sequence_list = ''
   append_to_netcdf  = .false.
   lonlim1    =    0.0
   lonlim2    =  360.0
   latlim1    =  -90.0
   latlim2    =   90.0
   verbose    = .false.
   /


&model_mod_check_nml
   input_state_files     = 'caminput.nc'
   output_state_files    = 'mmc_output.nc'
   test1thru             = 0
   run_tests             = 1,2,3,4,5,7
   x_ind                 = 175001

   quantity_of_interest  = 'QTY_U_WIND_COMPONENT'
   loc_of_interest       = 254.727854, 39.9768545, 50000.0

   interp_test_lonrange  = 0.0, 360.0
   interp_test_dlon      = 1.0
   interp_test_latrange  = -90.0, 90.0
   interp_test_dlat      = 1.0
   interp_test_vertrange = 10000.0,  90000.0
   interp_test_dvert     = 10000.0
   interp_test_vertcoord = 'VERTISPRESSURE'
   verbose               = .false.
   /


! different methods to compute 'distance' from mean:
!  1 = simple absolute difference
!  2 = normalized absolute difference
!  3 = simple rmse difference
!  4 = normalized rmse difference

&closest_member_tool_nml
   input_restart_file_list = 'cam_in.txt'
   output_file_name        = 'closest_restart'
! 和成员数相同
   ens_size                = 5
   single_restart_file_in  = .false.
   difference_method       = 4
   use_only_qtys           = ''
   /


&perturb_single_instance_nml
! 和成员数相同
   ens_size               = 5
   input_files            = 'caminput.nc'
   output_files           = 'cam_pert1.nc','cam_pert2.nc','cam_pert3.nc'
   output_file_list       = ''
   perturbation_amplitude = 0.2
   /


&quad_interpolate_nml
   debug = 0
   /

&cam_dart_obs_preprocessor_nml
   filename_in  = 'obs_seq.out'
   filename_out = 'obs_seq.nohighobs'
   verbose      = .false.
   calendar     = 'Gregorian'
   print_every  = 5000
   /

assimilation.csh

这是实际执行同化的脚本，有一些内容需要修改

首先在所有程序运行之前添加

1	set nonomatch

可以防止ls, rm 等命令找不到文件时不会报错终止

后来发现太多需要修改的了，以下贴出完整的assimilation.csh

#!/bin/csh
#
# DART software - Copyright UCAR. This open source software is provided
# by UCAR, "as is", without charge, subject to all terms of use at
# http://www.image.ucar.edu/DAReS/DART/DART_download
#
# DART $Id$

# ------------------------------------------------------------------------------
# Purpose: assimilate with a CAM ensemble and perform advanced archiving
#          and compression in support of multiple assimilation cycles in a
#          single CESM job.
#
# The (resulting) assimilate.csh script is called by CESM with two arguments:
# 1) the CASEROOT, and
# 2) the assimilation cycle number in this CESM job
# ------------------------------------------------------------------------------
# This template is lightly modified by the setup scripts to be appropriate
# for specific hardware and other configurations. The modified result is
# then given execute permission and is appropriate to use for an assimilation.
# All of this is automatically performed by the DART-supplied setup scripts.
#
# Tag DART's state output with names using CESM's convention:
#    ${case}.${scomp}[_$inst].${filetype}[.$dart_file].${date}.nc
#    These should all be named with $scomp = "cam" to distinguish
#    them from the same output from other components in multi-component assims.
#
# This script also has logic in it to manage disk space in a way that allows
# for more assimilation cycles to be performed before archiving without losing
# critical restart capability. The same logic is also useful for assimilations
# that may require multiple timesteps to be available.
#
# As a specific example, consider the case when 3 assimilation cycles have been
# performed: 6Z, 12Z, 18Z.
# If we want to keep a restart set and a backup
# restart set, we only need the 18Z and 12Z, so the 6Z set can be removed.
# Let's also say that its the last cycle of job - which automatically kicks off
# the short-term archiver. If we did 'nothing', the 12Z and 18Z get archived
# and the 18Z gets restaged

# machine-specific dereferencing
set nonomatch

if ($?SLURM_JOB_ID) then

   # SLURM environment variables:
   # env | grep SLURM | sort

   setenv ORIGINALDIR $SLURM_SUBMIT_DIR
   setenv     JOBNAME $SLURM_JOB_NAME
   setenv       JOBID $SLURM_JOBID
   setenv     MYQUEUE $SLURM_JOB_PARTITION
   setenv   NODENAMES $SLURM_NODELIST
   setenv LAUNCHCMD "mpirun -np $SLURM_NTASKS -bind-to core"
#  untested method for determining NUMTASKS_PERNODE with SLURM
#  set ANY_OLD_NODE = `head -n 1 $SLURM_NODELIST`
#  setenv NUMTASKS_PERNODE `grep $ANY_OLD_NODE $SLURM_NODELIST | wc -l`

else if ($?PBS_NODEFILE) then

   # PBS environment variables:
   # env | grep PBS | sort

   setenv ORIGINALDIR $PBS_O_WORKDIR
   setenv     JOBNAME $PBS_JOBNAME
   setenv       JOBID $PBS_JOBID
   setenv     MYQUEUE $PBS_O_QUEUE
   setenv     NUMCPUS $NCPUS
   setenv    NUMTASKS `cat  $PBS_NODEFILE | wc -l`
   setenv    NUMNODES `uniq $PBS_NODEFILE | wc -l`
   set ANY_OLD_NODE = `head -n 1 $PBS_NODEFILE`
   setenv    NUMTASKS_PERNODE `grep $ANY_OLD_NODE $PBS_NODEFILE | wc -l`
   setenv  MPIEXEC_MPT_DEBUG 0
   setenv MP_DEBUG_NOTIMEOUT yes
   setenv          LAUNCHCMD mpiexec_mpt

   echo "jobname        : $JOBNAME"
   echo "numcpus        : $NUMCPUS"
   echo "numtasks       : $NUMTASKS"
   echo "numnodes       : $NUMNODES"
   echo "tasks_per_node : $NUMTASKS_PERNODE"
   echo " "

else if ($?LSB_HOSTS) then

   # LSF environment variables:
   # env | grep LS | grep -v LS_COLORS | sort

   setenv ORIGINALDIR $LS_SUBCWD
   setenv     JOBNAME $LSB_OUTPUTFILE:ar
   setenv       JOBID $LSB_JOBID
   setenv     MYQUEUE $LSB_QUEUE
   setenv   NODENAMES ${LSB_HOSTS}
   setenv MP_DEBUG_NOTIMEOUT yes
   setenv LAUNCHCMD mpirun.lsf
#  untested method for determining NUMTASKS_PERNODE with LSF
#  setenv NUMTASKS_PERNODE \
#     `echo $LSB_SUB_RES_REQ | sed -ne '/ptile/s#.*\[ptile=\([0-9][0-9]*\)]#\1#p'`

endif

# ==============================================================================
# Block 0: Set command environment
# ==============================================================================
# This block is an attempt to localize all the machine-specific
# changes to this script such that the same script can be used
# on multiple platforms. This will help us maintain the script.

echo "`date` -- BEGIN CAM_ASSIMILATE"

set nonomatch      # suppress "rm" warnings if wildcard does not match anything

setenv CASEROOT $1

# CESM uses C indexing on loops; cycle = [0,....,$DATA_ASSIMILATION_CYCLES - 1]
# "Fix" that here, so the rest of the script isn't confusing.

@ cycle = $2 + 1

cd ${CASEROOT}

setenv scomp                     `./xmlquery COMP_ATM      --value`
setenv CASE                      `./xmlquery CASE          --value`
setenv ensemble_size             `./xmlquery NINST_ATM     --value`
setenv CAM_DYCORE                `./xmlquery CAM_DYCORE    --value`
setenv EXEROOT                   `./xmlquery EXEROOT       --value`
setenv RUNDIR                    `./xmlquery RUNDIR        --value`
setenv archive                   `./xmlquery DOUT_S_ROOT   --value`
setenv TOTALPES                  `./xmlquery TOTALPES      --value`
setenv CONT_RUN                  `./xmlquery CONTINUE_RUN  --value`
setenv CHECK_TIMING              `./xmlquery CHECK_TIMING  --value`
setenv DATA_ASSIMILATION_CYCLES  `./xmlquery DATA_ASSIMILATION_CYCLES --value`

# Switch CESM's timer script off for the rest of the forecasts of this job.
# The timer takes a significant amount of time, which grows by ~15 s
# for each cycle.  This can double the cycle time in a 2 week job.

./xmlchange CHECK_TIMING=FALSE

cd ${RUNDIR}

# A switch to save all the inflation files
setenv save_all_inf TRUE

# This may be needed before the short-term archiver has been run.
if (! -d ${archive}/esp/hist) mkdir -p ${archive}/esp/hist

# If they exist, mean and sd will always be saved.
# A switch to signal how often to save the stages' ensemble members.
#     valid values are:  NONE, RESTART_TIMES, ALL
setenv save_stages_freq RESTART_TIMES

# This next line ultimately specifies the location of the observation files.
set BASEOBSDIR = /data2/share/elzd_2024_000125/xhw/downloadData/MLS/2013/Level2/DART_seq

# suppress "rm" warnings if wildcard does not match anything
set nonomatch

# Make sure that this script is using standard system commands
# instead of aliases defined by the user.
# If the standard commands are not in the location listed below,
# change the 'set' commands to use them.
# The VERBOSE options are useful for debugging, but are optional because
# some systems don't like the -v option to any of the following.

set   MOVE = '/usr/bin/mv -v'
set   COPY = '/usr/bin/cp -v --preserve=timestamps'
set   LINK = '/usr/bin/ln -s'
set   LIST = '/usr/bin/ls '
set REMOVE = '/usr/bin/rm -r'

# ==============================================================================
# Block 1: Determine time of current model state from file name of member 1
# These are of the form "${CASE}.cam_${ensemble_member}.i.2000-01-06-00000.nc"
# ==============================================================================

# Piping stuff through 'bc' strips off any preceeding zeros.

set FILE = `head -n 1 rpointer.atm_0001`
set FILE = $FILE:r
set ATM_DATE_EXT = $FILE:e
set ATM_DATE     = `echo $FILE:e | sed -e "s#-# #g"`
set ATM_YEAR     = `echo $ATM_DATE[1] | bc`
set ATM_MONTH    = `echo $ATM_DATE[2] | bc`
set ATM_DAY      = `echo $ATM_DATE[3] | bc`
set ATM_SECONDS  = `echo $ATM_DATE[4] | bc`
set ATM_HOUR     = `echo $ATM_DATE[4] / 3600 | bc`

echo "valid time of model is $ATM_YEAR $ATM_MONTH $ATM_DAY $ATM_SECONDS (seconds)"
echo "valid time of model is $ATM_YEAR $ATM_MONTH $ATM_DAY $ATM_HOUR (hours)"

# Move the hidden restart set back into $rundir so that it is processed properly.

${LIST} -d ../Hide*
if ($status == 0) then
   echo 'Moving hidden restarts into the run directory so they can be used or purged.'
   ${MOVE} ../Hide*/* .
   rmdir   ../Hide*
endif

# We need to know the names of the current cesm.log files - one log file is created
# by each CESM model advance.

set log_list = `${LIST} -t cesm.log.*`

echo "most recent log is $log_list[1]"
echo "oldest      log is $log_list[$#log_list]"
echo "entire log list is $log_list"
echo " "

# ==============================================================================
# Block 2: Populate a run-time directory with the input needed to run DART.
# ==============================================================================

echo "`date` -- BEGIN COPY BLOCK"

# Put a pared down copy (no comments) of input.nml in this assimilate_cam directory.
# The contents may change from one cycle to the next, so always start from
# the known configuration in the CASEROOT directory.

if (  -e   ${CASEROOT}/input.nml ) then

   sed -e "/#/d;/^\!/d;/^[ ]*\!/d" \
       -e '1,1i\WARNING: Changes to this file will be ignored. \n Edit \$CASEROOT/input.nml instead.\n\n\n' \
       ${CASEROOT}/input.nml >! input.nml  || exit 10
else
   echo "ERROR ... DART required file ${CASEROOT}/input.nml not found ... ERROR"
   echo "ERROR ... DART required file ${CASEROOT}/input.nml not found ... ERROR"
   exit 11
endif

echo "`date` -- END COPY BLOCK"

# If possible, use the round-robin approach to deal out the tasks.
# This facilitates using multiple nodes for the simultaneous I/O operations.

if ($?NUMTASKS_PERNODE) then
   if ($#NUMTASKS_PERNODE > 0) then
      ${MOVE} input.nml input.nml.$$ || exit 20
      sed -e "s#layout.*#layout = 2#" \
          -e "s#tasks_per_node.*#tasks_per_node = $NUMTASKS_PERNODE#" \
          input.nml.$$ >! input.nml || exit 21
      ${REMOVE} -f input.nml.$$
   endif
endif

# ==============================================================================
# Block 3: Identify requested output stages, warn about redundant output.
# ==============================================================================

set MYSTRING = `grep stages_to_write input.nml`
set MYSTRING = (`echo $MYSTRING | sed -e "s#[=,'\.]# #g"`)
set STAGE_input     = FALSE
set STAGE_forecast  = FALSE
set STAGE_preassim  = FALSE
set STAGE_postassim = FALSE
set STAGE_analysis  = FALSE
set STAGE_output    = FALSE

# Assemble lists of stages to write out, which are not the 'output' stage.

set stages_except_output = "{"
@ stage = 2
while ($stage <= $#MYSTRING)
   if ($MYSTRING[$stage] == 'input') then
      set STAGE_input = TRUE
      if ($stage > 2) set stages_except_output = "${stages_except_output},"
      set stages_except_output = "${stages_except_output}input"
   endif
   if ($MYSTRING[$stage] == 'forecast') then
      set STAGE_forecast = TRUE
      if ($stage > 2) set stages_except_output = "${stages_except_output},"
      set stages_except_output = "${stages_except_output}forecast"
   endif
   if ($MYSTRING[$stage] == 'preassim') then
      set STAGE_preassim = TRUE
      if ($stage > 2) set stages_except_output = "${stages_except_output},"
      set stages_except_output = "${stages_except_output}preassim"
   endif
   if ($MYSTRING[$stage] == 'postassim') then
      set STAGE_postassim = TRUE
      if ($stage > 2) set stages_except_output = "${stages_except_output},"
      set stages_except_output = "${stages_except_output}postassim"
   endif
   if ($MYSTRING[$stage] == 'analysis') then
      set STAGE_analysis = TRUE
      if ($stage > 2) set stages_except_output = "${stages_except_output},"
      set stages_except_output = "${stages_except_output}analysis"
   endif
   if ($stage == $#MYSTRING) then
      set stages_all = "${stages_except_output}"
      if ($MYSTRING[$stage] == 'output') then
         set STAGE_output = TRUE
         set stages_all = "${stages_all},output"
      endif
   endif
   @ stage++
end

# Add the closing }
set stages_all = "${stages_all}}"
set stages_except_output = "${stages_except_output}}"

# Checking
echo "stages_except_output = $stages_except_output"
echo "stages_all = $stages_all"
if ($STAGE_output != TRUE) then
   echo "ERROR: assimilate.csh requires that input.nml:filter_nml:stages_to_write includes stage 'output'"
   exit 40
endif

# ==============================================================================
# Block 4: Preliminary clean up, which can run in the background.
# ==============================================================================
# CESM2_0's new archiver has a mechanism for removing restart file sets,
# which we don't need, but it runs only after the (multicycle) job finishes.
# We'd like to remove unneeded restarts as the job progresses, allowing more
# cycles to run before needing to stop to archive data.  So clean them out of
# RUNDIR, and st_archive will never have to deal with them.
# ------------------------------------------------------------------------------

# For safety, leave the most recent *2* restart sets in place.
# Prevents catastrophe if the last restart set is partially written before a crash.
# Add 1 more because the restart set used to start this will be counted:
# there will be 3 restarts when there are only 2 cesm.log files,
# which caused all the files to be deleted.

### edited by xhw ###
# - following code was added for convenient to control the run
set if_save_initial='true'
if ($if_save_initial == 'true') then
   set re_list = `${LIST} -t *cpl_0001.r.*`

   set FILE = $re_list[1]
   set FILE = $FILE:r
   if ($FILE:e == 'nc') set FILE = $FILE:r
   set rm_date = $FILE:e
   set RM_DATE_PARTS = `echo $rm_date | sed -e "s#-# #g"`

   # find the current model time from cpl.log file
   set day_o_month = $RM_DATE_PARTS[3]
   set sec_o_day   = $RM_DATE_PARTS[4]
   set day_time    = ${day_o_month}-${sec_o_day}


   #mkdir -p histfiles
   #${COPY} ${CASE}*.{h0,h1}.[0-9]*${day_time}*  ./histfiles/ &

   # clean {ha2x1h,ha2x1hi,ha2x3h,hr2x,ha2x1h} files of last cycle
   if ($#re_list >= 2) then
      set FILE_last = $re_list[2]
      set FILE_last = $FILE_last:r
      if ($FILE_last:e == 'nc') set FILE_last = $FILE_last:r
      set rm_date_last = $FILE_last:e
      set RM_DATE_PARTS_last = `echo $rm_date_last | sed -e "s#-# #g"`
      set day_o_month_last = $RM_DATE_PARTS_last[3]
      set sec_o_day_last   = $RM_DATE_PARTS_last[4]
      set day_time_last    = ${day_o_month_last}-${sec_o_day_last}
      ${REMOVE} ${CASE}*.{ha2x1h,ha2x1hi,ha2x3h,hr2x,ha2x1h}.*${day_time_last}* &
   endif

   # backup the initial files 
   if ($sec_o_day =~ '00000') then

      ### edited by xhw 2024-08-03 ###
      mkdir -p initialfiles
      ${COPY} ${CASE}*.i.[0-9]*${day_time}* ./initialfiles/  &
      ###--------------------------###
   endif

   # use changeSDdate.sh under CASEROOT to change the nudging timestamp
   # nudging timestamp must be changed to current model datetime
   set formatted_month = `printf "%02d" $ATM_MONTH`
   set formatted_day = `printf "%02d" $ATM_DAY`
   $CASEROOT/changeSDdate.sh $CASEROOT ${ATM_YEAR}${formatted_month}${formatted_day}

   

   # - below are the content of changeSDdate.sh
   # caseroot=$1
   # newdate=$2

   # for dd in $(seq 1 5); do
      # filename="user_nl_cam_000$dd"
      # # 使用 sed 修改 met_data_file 变量中的日期字段
      # sed -i "s/\(met_data_file          =.*_\)[0-9]\{8\}\(.*\)/\1${newdate}\2/" "$caseroot/$filename"

      # # 输出结果
      # if [ $? -eq 0 ]; then
      #     echo "文件 $filename 中的 met_data_file 变量的日期字段已修改为 $newdate。"
      # else
      #     echo "修改文件 $filename 中的 met_data_file 变量的日期字段失败。"
      # fi
   # done

endif
# --------------------------#

### edited by xhw, ($#log_list >= 5)
if ($#log_list >= 5) then

   # List of potential restart sets to remove. The coupler restart files
   # may or may not have an 'instance' string in them, depending on whether
   # or not you are using the multi-driver or not, so we must check for both.

   set re_list = `${LIST} -t *cpl.r.*`
   if ($#re_list == 0) set re_list = `${LIST} -t *cpl_0001.r.*`

   ### edited by xhw, just set ($#re_list < 5)
   if ($#re_list < 5) then
      echo "ERROR: Too many cesm.log files ($#log_list) for the $#re_list restart sets."
      echo "       Clean out the cesm.log files from failed cycles."
      exit 50
   endif

   # Find the date of the oldest restart set from filenames like:
   # setup_test.cpl_0001.r.2016-12-11-21600.nc   ... or ...
   # setup_test.cpl.r.2016-12-11-21600.nc
   #
   # Grab the root of the filename (removes the .nc 'extension')
   # and then the extension is the bit we need.
   # Want the YYYY-MM-DD-SSSSS part as well as 'DD-SSSSS'
   
   ### edited by xhw ###
   # - we want to keep the last four restart files under the folder
   # - so when ($#log_list >= 5), we remove the 5th file
   set FILE = $re_list[5]  

   # --------------- ###
   set FILE = $FILE:r
   if ($FILE:e == 'nc') set FILE = $FILE:r
   set rm_date = $FILE:e
   set RM_DATE_PARTS = `echo $rm_date | sed -e "s#-# #g"`
   set day_o_month = $RM_DATE_PARTS[3]
   set sec_o_day   = $RM_DATE_PARTS[4]
   set day_time    = ${day_o_month}-${sec_o_day}

   # Decide whether to purge restart files at this date and time.
   # edited by xhw, just set 3 to 4
   set save_rest_freq = 4

   set purge = 'true'
   # Learn whether save_rest_freq a string or a number.
   # Character strings must be tested outside of the 'if' statement.
   echo $save_rest_freq | grep '[a-z]'
   if ($status == 0) then
      set purge_date = $RM_DATE_PARTS[1]-$RM_DATE_PARTS[2]-$RM_DATE_PARTS[3]
      set weekday = `date --date="$purge_date" +%A`
      if ($weekday == $save_rest_freq) set purge = 'false'
 
   # Numbers can be tested inside the 'if' statement.
   else if (`echo $save_rest_freq | grep '[0-9]'`) then
      if (${day_o_month} % ${save_rest_freq} == 0) set purge = 'false'
 
   endif

   # Identify log files to be removed or moved.
   # [3] means the 3rd oldest restart set is being (re)moved.
   set rm_log = `echo $log_list[3] | sed -e "s/\./ /g;"`
   set rm_slot = $#rm_log
   if ($rm_log[$#rm_log] == 'gz') @ rm_slot--
   echo 'oldest restart set is from job tagged $rm_log['$rm_slot']='$rm_log[$rm_slot]

   # This first half of the statement removes unwanted restarts.
   # The 'else' block preserves the restarts in the archive directory.

   ### edited by xhw ###
   # - Regardless of whether the mode time is equal to "00000", run the code below
   
   #if ( $sec_o_day !~ '00000' || \
   #    ($sec_o_day =~ '00000' && $purge == 'true') ) then
    if ( ( $sec_o_day !~ '00000') || ( $sec_o_day =~ '00000') ) then
    
   # --------------- ###
      # Optionally save inflation restarts, even if it's not a 'save restart' time.
      if ($save_all_inf =~ TRUE) ${MOVE} ${CASE}*inf*${day_time}*  ${archive}/esp/hist

      # Remove intermediate member restarts,
      # but not DART means, sd, obs_seq, inflation restarts output.
      # Note that *cpl.h[ar]* are retained, and any h#, #>0.

      ### edited by xhw ###
      # - change {r,rs,rs1,rh0,h0} to {r,rs,rs1,rh0}
      # - do not remove history files, and add "set nonomatch"
      set nonomatch
      echo "Removing unneeded restart file set (DD_SSSSS ${day_time}) from RUNDIR: "
      echo "     ${CASE}"'*.{r,rs,rs1,rh0}.*'"${day_time}"
      ${REMOVE}  ${CASE}*.{r,rs,rs1,rh0}.*${day_time}* 

      # ---------------###

      # Handle .i. separately to avoid sweeping up .i.${scomp}_{in,out}put_{mean,sd,...} files.
      echo "     ${CASE}"'*.i.[0-9]*'"${day_time}"
      ### edited by xhw ###
      # - ${REMOVE} can make us lose initial and history files
      # - so just comment the line below

      #${REMOVE} ${CASE}*.i.[0-9]*${day_time}*  &
      # --------------- ###
      if ($save_stages_freq =~ NONE || $save_stages_freq =~ RESTART_TIMES) then
         # 'output' will have been renamed by the time the purging happens.
         echo "     ${CASE}"'*'[0-9].${stages_except_output}'*'${day_time}

         ### edited by xhw ###
         # - ${REMOVE} can make us lose initial and history files
         # - so just comment the line below

         # ${REMOVE}  ${CASE}.*[0-9].${stages_except_output}*${day_time}* &
         # ------------------#
      endif
   else

      echo "Preserving (compressed) restart file set (DD_SSSSS ${day_time})"

      # Optionally COPY inflation restarts to the same place as the other inflation restarts.
      if ($save_all_inf =~ TRUE) then
          ${COPY} ${CASE}*inf*${day_time}*  ${archive}/esp/hist &
      endif

      # Optionally REMOVE stages' ensemble members (not means and sds).
      if ($save_stages_freq =~ NONE ) then
         echo "Removing unneeded stages' ensemble members (DD_SSSSS ${day_time}) from RUNDIR: "
         echo "     ${CASE}"'*'[0-9].${stages_except_output}'*'${day_time}
         ${REMOVE}  ${CASE}.*[0-9].${stages_except_output}*${day_time}* &
      endif

      wait

      # The list of components determines which restarts are compressed by this call.
      # List the large files first (more efficient and reliable).
      # There is another call farther down to compress the DART files every cycle.
      echo "compress.csh started at `date`"
      
      ### edited by xhw 2024-08-03  ###
      # - we don't need to compress any files
      # - commenting the codes below is just ok
      
      # ${CASEROOT}/compress.csh $CASE ${rm_date} $ensemble_size "clm2 cpl cam cice" "$stages_all"
      #if ($status != 0) then
      #   echo "compress.csh failed at `date`"
      #   exit 55
      #endif
      #echo "compress.csh finished at `date`"
      ### -------------------------###


      # Save the restart set to archive/rest/$datename,
      # where it will be safe from removes of $component/rest.
      # There is an implicit assumption that some sort of inflation will be used.

      set save_root = ${archive}/rest/${rm_date}
      if (! -d $save_root) then
         mkdir -p $save_root
         (${MOVE} ${CASE}*.{r,rs,rs1,rh0,h0}.*${day_time}*  $save_root || exit 60) &
         (${MOVE} ${CASE}*.i.[0-9]*${day_time}*             $save_root || exit 61) &
         (${COPY} *.output*inf*${day_time}*                 $save_root || exit 62) &
         (${MOVE} *0001*${rm_log[$rm_slot]}*                $save_root || exit 63) &
         (${MOVE} cesm*${rm_log[$rm_slot]}*                 $save_root || exit 64) &
      else
         echo "WARNING: $save_root already exists.  Did st_archive make it?"
#          exit 65
      endif
   endif

   # Remove log files: *YYMMDD-HHMMSS*.  Except not da.log files
   ${REMOVE}  [^d]*${rm_log[$rm_slot]}*  &

   # I'd like to remove the CAM .r. files, since we always use the .i. files to do a hybrid start,
   # but apparently CESM needs them to be there, even though it doesn't read fields from them.
   # ${REMOVE}  ${CASE}.cam*.r.*${day_time}.nc &

endif

# ==============================================================================
# Block 5: Get observation sequence file ... or die right away.
# The observation file names have a time that matches the stopping time of CAM.
#
# Make sure the file name structure matches the obs you will be using.
# PERFECT model obs output appends .perfect to the filenames
# ==============================================================================

set YYYYMM = `printf %04d%02d ${ATM_YEAR} ${ATM_MONTH}`

if (! -d ${BASEOBSDIR}/${YYYYMM}_6H_CESM) then
   echo "CESM+DART requires 6 hourly obs_seq files in directories of the form YYYYMM_6H_CESM"
   echo "The directory ${BASEOBSDIR}/${YYYYMM}_6H_CESM is not found.  Exiting"
   exit 70
endif

set OBSFNAME = `printf obs_seq.%04d-%02d-%02d-%05d ${ATM_YEAR} ${ATM_MONTH} ${ATM_DAY} ${ATM_SECONDS}`

set OBS_FILE = ${BASEOBSDIR}/${YYYYMM}_6H_CESM/${OBSFNAME}
echo "OBS_FILE = $OBS_FILE"

${REMOVE} obs_seq.out
if (  -e ${OBS_FILE} ) then
   ${LINK} ${OBS_FILE} obs_seq.out || exit 80
else
   echo "ERROR ... no observation file ${OBS_FILE}"
   echo "ERROR ... no observation file ${OBS_FILE}"
   exit 81
endif

# ==============================================================================
# Block 6: DART INFLATION
# This block is only relevant if 'inflation' is turned on AND
# inflation values change through time:
# filter_nml
#    inf_flavor(:)  = 2  (or 3 (or 4 for posterior))
#    inf_initial_from_restart    = .TRUE.
#    inf_sd_initial_from_restart = .TRUE.
#
# This block stages the files that contain the inflation values.
# The inflation files are essentially duplicates of the DART model state,
# which have names in the CESM style, something like
#    ${case}.dart.rh.${scomp}_output_priorinf_{mean,sd}.YYYY-MM-DD-SSSSS.nc
# The strategy is to use the latest such files in ${RUNDIR}.
# If those don't exist at the start of an assimilation,
# this block creates them with 'fill_inflation_restart'.
# If they don't exist AFTER the first cycle, the script will exit
# because they should have been available from a previous cycle.
# The script does NOT check the model date of the files for consistency
# with the current forecast time, so check that the inflation mean
# files are evolving as expected.
#
# CESM's st_archive should archive the inflation restart files
# like any other "restart history" (.rh.) files; copying the latest files
# to the archive directory, and moving all of the older ones.
# ==============================================================================

# If we need to run fill_inflation_restart, CAM:static_init_model()
# always needs a caminput.nc and a cam_phis.nc for geometry information, etc.

set MYSTRING = `grep cam_template_filename input.nml`
set MYSTRING = `echo $MYSTRING | sed -e "s#[=,']# #g"`
set CAMINPUT = $MYSTRING[2]
${REMOVE} ${CAMINPUT}
${LINK} ${CASE}.cam_0001.i.${ATM_DATE_EXT}.nc ${CAMINPUT} || exit 90

# All of the .h0. files contain the same PHIS field, so we can link to any of them.
set MYSTRING = `grep cam_phis_filename input.nml`
set MYSTRING = `echo $MYSTRING | sed -e "s#[=,']# #g"`
set CAM_PHIS = $MYSTRING[2]
# Listings are slow in productions runs; only do it if necessary
if (! -f ${CAM_PHIS}) then
   set hists = `${LIST} ${CASE}.cam_0001.h0.*.nc`
   ${COPY} $hists[1] ${CAM_PHIS} || exit 100
endif

# Now, actually check the inflation settings

set  MYSTRING = `grep inf_flavor input.nml`
set  MYSTRING = `echo $MYSTRING | sed -e "s#[=,'\.]# #g"`
set  PRIOR_INF = $MYSTRING[2]
set  POSTE_INF = $MYSTRING[3]

set  MYSTRING = `grep inf_initial_from_restart input.nml`
set  MYSTRING = `echo $MYSTRING | sed -e "s#[=,'\.]# #g"`

# If no inflation is requested, the inflation restart source is ignored

if ( $PRIOR_INF == 0 ) then
   set  PRIOR_INFLATION_FROM_RESTART = ignored
   set  USING_PRIOR_INFLATION = false
else
   set  PRIOR_INFLATION_FROM_RESTART = `echo $MYSTRING[2] | tr '[:upper:]' '[:lower:]'`
   set  USING_PRIOR_INFLATION = true
endif

if ( $POSTE_INF == 0 ) then
   set  POSTE_INFLATION_FROM_RESTART = ignored
   set  USING_POSTE_INFLATION = false
else
   set  POSTE_INFLATION_FROM_RESTART = `echo $MYSTRING[3] | tr '[:upper:]' '[:lower:]'`
   set  USING_POSTE_INFLATION = true
endif

if ($USING_PRIOR_INFLATION == false ) then
   set stages_requested = 0
   if ( $STAGE_input    == TRUE ) @ stages_requested++
   if ( $STAGE_forecast == TRUE ) @ stages_requested++
   if ( $STAGE_preassim == TRUE ) @ stages_requested++
   if ( $stages_requested > 1 ) then
      echo " "
      echo "WARNING ! ! Redundant output is requested at multiple stages before assimilation."
      echo "            Stages 'input' and 'forecast' are always redundant."
      echo "            Prior inflation is OFF, so stage 'preassim' is also redundant. "
      echo "            We recommend requesting just 'preassim'."
      echo " "
   endif
endif

if ($USING_POSTE_INFLATION == false ) then
   set stages_requested = 0
   if ( $STAGE_postassim == TRUE ) @ stages_requested++
   if ( $STAGE_analysis  == TRUE ) @ stages_requested++
   if ( $STAGE_output    == TRUE ) @ stages_requested++
   if ( $stages_requested > 1 ) then
      echo " "
      echo "WARNING ! ! Redundant output is requested at multiple stages after assimilation."
      echo "            Stages 'output' and 'analysis' are always redundant."
      echo "            Posterior inflation is OFF, so stage 'postassim' is also redundant. "
      echo "            We recommend requesting just 'output'."
      echo " "
   endif
endif

# IFF we want PRIOR inflation:

if ($USING_PRIOR_INFLATION == true) then
   if ($PRIOR_INFLATION_FROM_RESTART == false) then

      echo "inf_flavor(1) = $PRIOR_INF, using namelist values."

   else
      # Look for the output from the previous assimilation (or fill_inflation_restart)
      # If inflation files exists, use them as input for this assimilation
      (${LIST} -rt1 *.dart.rh.${scomp}_output_priorinf_mean* | tail -n 1 >! latestfile) > & /dev/null
      (${LIST} -rt1 *.dart.rh.${scomp}_output_priorinf_sd*   | tail -n 1 >> latestfile) > & /dev/null
      set nfiles = `cat latestfile | wc -l`

      if ( $nfiles > 0 ) then

         set latest_mean = `head -n 1 latestfile`
         set latest_sd   = `tail -n 1 latestfile`
         # Need to COPY instead of link because of short-term archiver and disk management.
         ${COPY} $latest_mean input_priorinf_mean.nc
         ${COPY} $latest_sd   input_priorinf_sd.nc

      else if ($CONT_RUN == FALSE) then

         # It's the first assimilation; try to find some inflation restart files
         # or make them using fill_inflation_restart.
         # Fill_inflation_restart needs caminput.nc and cam_phis.nc for static_model_init,
         # so this staging is done in assimilate.csh (after a forecast) instead of stage_cesm_files.

         if (-x ${EXEROOT}/fill_inflation_restart) then

            ${EXEROOT}/fill_inflation_restart

         else
            echo "ERROR: Requested PRIOR inflation restart for the first cycle."
            echo "       There are no existing inflation files available "
            echo "       and ${EXEROOT}/fill_inflation_restart is missing."
            echo "EXITING"
            exit 112
         endif

      else
         echo "ERROR: Requested PRIOR inflation restart, "
         echo '       but files *.dart.rh.${scomp}_output_priorinf_* do not exist in the ${RUNDIR}.'
         echo '       If you are changing from cam_no_assimilate.csh to assimilate.csh,'
         echo '       you might be able to continue by changing CONTINUE_RUN = FALSE for this cycle,'
         echo '       and restaging the initial ensemble.'
         ${LIST} -l *inf*
         echo "EXITING"
         exit 115
      endif
   endif
else
   echo "Prior Inflation not requested for this assimilation."
endif

# POSTERIOR: We look for the 'newest' and use it - IFF we need it.

if ($USING_POSTE_INFLATION == true) then
   if ($POSTE_INFLATION_FROM_RESTART == false) then

      # we are not using an existing inflation file.
      echo "inf_flavor(2) = $POSTE_INF, using namelist values."

   else
      # Look for the output from the previous assimilation (or fill_inflation_restart).
      # (The only stage after posterior inflation.)
      (${LIST} -rt1 *.dart.rh.${scomp}_output_postinf_mean* | tail -n 1 >! latestfile) > & /dev/null
      (${LIST} -rt1 *.dart.rh.${scomp}_output_postinf_sd*   | tail -n 1 >> latestfile) > & /dev/null
      set nfiles = `cat latestfile | wc -l`

      # If one exists, use it as input for this assimilation
      if ( $nfiles > 0 ) then

         set latest_mean = `head -n 1 latestfile`
         set latest_sd   = `tail -n 1 latestfile`
         ${LINK} $latest_mean input_postinf_mean.nc || exit 120
         ${LINK} $latest_sd   input_postinf_sd.nc   || exit 121

      else if ($CONT_RUN == FALSE) then
         # It's the first assimilation; try to find some inflation restart files
         # or make them using fill_inflation_restart.
         # Fill_inflation_restart needs caminput.nc and cam_phis.nc for static_model_init,
         # so this staging is done in assimilate.csh (after a forecast) instead of stage_cesm_files.

         if (-x ${EXEROOT}/fill_inflation_restart) then
            ${EXEROOT}/fill_inflation_restart
            ${MOVE} prior_inflation_mean.nc input_postinf_mean.nc || exit 125
            ${MOVE} prior_inflation_sd.nc   input_postinf_sd.nc   || exit 126

         else
            echo "ERROR: Requested POSTERIOR inflation restart for the first cycle."
            echo "       There are no existing inflation files available "
            echo "       and ${EXEROOT}/fill_inflation_restart is missing."
            echo "EXITING"
            exit 127
         endif

      else
         echo "ERROR: Requested POSTERIOR inflation restart, "
         echo '       but files *.dart.rh.${scomp}_output_postinf_* do not exist in the ${RUNDIR}.'
         ${LIST} -l *inf*
         echo "EXITING"
         exit 128
      endif
   endif
else
   echo "Posterior Inflation not requested for this assimilation."
endif

# ==============================================================================
# Block 7: Actually run the assimilation.
#
# DART namelist settings required:
# &filter_nml
#    adv_ens_command         = "no_CESM_advance_script",
#    obs_sequence_in_name    = 'obs_seq.out'
#    obs_sequence_out_name   = 'obs_seq.final'
#    single_file_in          = .false.,
#    single_file_out         = .false.,
#    stages_to_write         = stages you want + ,'output'
#    input_state_file_list   = 'cam_init_files'
#    output_state_file_list  = 'cam_init_files',
#
# WARNING: the default mode of this script assumes that
#          input_state_file_list = output_state_file_list, so that
#          the CAM initial files used as input to filter will be overwritten.
#          The input model states can be preserved by requesting that stage
#          'forecast' be output.
#
# ==============================================================================

# In the default mode of CAM assimilations, filter gets the model state(s)
# from CAM initial files.  This section puts the names of those files into a text file.
# The name of the text file is provided to filter in filter_nml:input_state_file_list.

# NOTE:
# If the files in input_state_file_list are CESM initial files (all vars and
# all meta data), then they will end up with a different structure than
# the non-'output', stage output written by filter ('preassim', 'postassim', etc.).
# This can be prevented (at the cost of more disk space) by copying
# the CESM format initial files into the names filter will use for preassim, etc.:
#    > cp $case.cam_0001.i.$date.nc  preassim_member_0001.nc.
#    > ... for all members
# Filter will replace the state variables in preassim_member* with updated versions,
# but leave the other variables and all metadata unchanged.

# If filter will create an ensemble from a single state,
#    filter_nml: perturb_from_single_instance = .true.
# it's fine (and convenient) to put the whole list of files in input_state_file_list.
# Filter will just use the first as the base to perturb.

set line = `grep input_state_file_list input.nml | sed -e "s#[=,'\.]# #g"`
set input_file_list_name = $line[2]

# If the file names in $output_state_file_list = names in $input_state_file_list,
# then the restart file contents will be overwritten with the states updated by DART.

set line = `grep output_state_file_list input.nml | sed -e "s#[=,'\.]# #g"`
set output_file_list_name = $line[2]

if ($input_file_list_name != $output_file_list_name) then
   echo "ERROR: assimilate.csh requires that input_file_list = output_file_list"
   echo "       You can probably find the data you want in stage 'forecast'."
   echo "       If you truly require separate copies of CAM's initial files"
   echo "       before and after the assimilation, see revision 12603, and note that"
   echo "       it requires changing the linking to cam_initial_####.nc, below."
   exit 130
endif

${LIST} -1 ${CASE}.cam_[0-9][0-9][0-9][0-9].i.${ATM_DATE_EXT}.nc >! $input_file_list_name

echo "`date` -- BEGIN FILTER"

### edited by xhw ###
# change ${LAUNCHCMD} to mpirun -np 64 
# program can stack if too many cores are used
# 64 cores are ok

# ${LAUNCHCMD} ${EXEROOT}/filter || exit 140
mpirun -np 64 ${EXEROOT}/filter || exit 140
###---------------###


echo "`date` -- END FILTER"

# ==============================================================================
# Block 8: Rename the output using the CESM file-naming convention.
# ==============================================================================

# If output_state_file_list is filled with custom (CESM) filenames,
# then 'output' ensemble members will not appear with filter's default,
# hard-wired names.  But file types output_{mean,sd} will appear and be
# renamed here.
#
# We don't know the exact set of files which will be written,
# so loop over all possibilities: use LIST in the foreach.
# LIST will expand the variables and wildcards, only existing files will be
# in the foreach loop. (If the input.nml has num_output_state_members = 0,
# there will be no output_member_xxxx.nc even though the 'output' stage
# may be requested - for the mean and sd) 
#
# Handle files with instance numbers first.
#    split off the .nc
#    separate the pieces of the remainder
#    grab all but the trailing 'member' and #### parts.
#    and join them back together

echo "`date` -- BEGIN FILE RENAMING"

# The short-term archiver archives files depending on pieces of their names.
# '_####.i.' files are CESM initial files.
# '.dart.i.' files are ensemble statistics (mean, sd) of just the state variables 
#            in the initial files.
# '.e.'      designates a file as something from the 'external system processing ESP', e.g. DART.

foreach FILE (`${LIST} ${stages_all}_member_*.nc`)

   set parts = `echo $FILE | sed -e "s#\.# #g"`
   set list = `echo $parts[1]  | sed -e "s#_# #g"`
   @ last = $#list - 2
   set dart_file = `echo $list[1-$last] | sed -e "s# #_#g"`

   # DART 'output_member_****.nc' files are actually linked to cam input files

   set type = "e"
   echo $FILE | grep "put"
   if ($status == 0) set type = "i"

   ${MOVE} $FILE \
       ${CASE}.${scomp}_$list[$#list].${type}.${dart_file}.${ATM_DATE_EXT}.nc || exit 150
end

# Files without instance numbers need to have the scomp part of their names = "dart".
# This is because in st_archive, all files with  scomp = "cam"
# (= compname in env_archive.xml) will be st_archived using a pattern
# which has the instance number added onto it.  {mean,sd} files don't have 
# instance numbers, so they need to be archived by the "dart" section of env_archive.xml.
# But they still need to be different for each component, so include $scomp in the
# ".dart_file" part of the file name.  Somewhat awkward and inconsistent, but effective.

# Means and standard deviation files (except for inflation).
foreach FILE (`${LIST} ${stages_all}_{mean,sd}*.nc`)

   set parts = `echo $FILE | sed -e "s#\.# #g"`
   set type = "e"
   echo $FILE | grep "put"
   if ($status == 0) set type = "i"

   ${MOVE} $FILE ${CASE}.dart.${type}.${scomp}_$parts[1].${ATM_DATE_EXT}.nc || exit 160
end

# Rename the observation file and run-time output

${MOVE} obs_seq.final ${CASE}.dart.e.${scomp}_obs_seq_final.${ATM_DATE_EXT} || exit 170
${MOVE} dart_log.out                 ${scomp}_dart_log.${ATM_DATE_EXT}.out || exit 171

# Rename the inflation files and designate them as 'rh' files - which get
# reinstated in the run directory by the short-term archiver and are then
# available for the next assimilation cycle.
#
# Accommodate any possible inflation files.
# The .${scomp}_ part is needed by DART to distinguish
# between inflation files from separate components in coupled assims.

foreach FILE (`${LIST} ${stages_all}_{prior,post}inf_*`)

   set parts = `echo $FILE | sed -e "s#\.# #g"`
   ${MOVE} $FILE  ${CASE}.dart.rh.${scomp}_$parts[1].${ATM_DATE_EXT}.nc || exit 180

end

# Handle localization_diagnostics_files
set MYSTRING = `grep 'localization_diagnostics_file' input.nml`
set MYSTRING = `echo $MYSTRING | sed -e "s#[=,']# #g"`
set MYSTRING = `echo $MYSTRING | sed -e 's#"# #g'`
set loc_diag = $MYSTRING[2]
if (-f $loc_diag) then
   ${MOVE} $loc_diag  ${scomp}_${loc_diag}.dart.e.${ATM_DATE_EXT} || exit 190
endif

# Handle regression diagnostics
set MYSTRING = `grep 'reg_diagnostics_file' input.nml`
set MYSTRING = `echo $MYSTRING | sed -e "s#[=,']# #g"`
set MYSTRING = `echo $MYSTRING | sed -e 's#"# #g'`
set reg_diag = $MYSTRING[2]
if (-f $reg_diag) then
   ${MOVE} $reg_diag  ${scomp}_${reg_diag}.dart.e.${ATM_DATE_EXT} || exit 200
endif

# Then this script will need to feed the files in output_restart_list_file
# to the next model advance.
# This gets the .i. or .r. piece from the CESM format file name.
set line = `grep 0001 $output_file_list_name | sed -e "s#[\.]# #g"`
set l = 1
while ($l < $#line)
   if ($line[$l] =~ ${scomp}_0001) then
      @ l++
      set file_type = $line[$l]
      break
   endif
   @ l++
end

set member = 1
while ( ${member} <= ${ensemble_size} )

   set inst_string = `printf _%04d $member`
   set ATM_INITIAL_FILENAME = ${CASE}.${scomp}${inst_string}.${file_type}.${ATM_DATE_EXT}.nc

   ${REMOVE} ${scomp}_initial${inst_string}.nc
   ${LINK} $ATM_INITIAL_FILENAME ${scomp}_initial${inst_string}.nc || exit 210

   @ member++

end

echo "`date` -- END   FILE RENAMING"

if ($cycle == $DATA_ASSIMILATION_CYCLES) then
   echo "`date` -- BEGIN (NON-RESTART) ARCHIVING LOGIC"

   if ($#log_list >= 3) then

      # During the last cycle, hide the previous restart set
      # so that it's not archived, but is available.
      # (Coupled assimilations may need to keep multiple atmospheric
      #  cycles for each ocean cycle.)

      set FILE = $re_list[2]
      set FILE = $FILE:r
      if ($FILE:e == 'nc') set FILE = $FILE:r
      set hide_date = $FILE:e
      set HIDE_DATE_PARTS = `echo $hide_date | sed -e "s#-# #g"`
      set day_o_month = $HIDE_DATE_PARTS[3]
      set sec_o_day   = $HIDE_DATE_PARTS[4]
      set day_time    = ${day_o_month}-${sec_o_day}

      set hidedir = ../Hide_${day_time}
      mkdir $hidedir

      if ($save_all_inf =~ TRUE) then
         # Put the previous and current inflation restarts in the archive directory.
         # (to protect last from st_archive putting them in exp/hist)
         ${MOVE}   ${CASE}*${stages_except_output}*inf*  ${archive}/esp/rest

         # Don't need previous inf restarts now, but want them to be archived later.
         # COPY instead of LINK because they'll be moved or used later.
         ${COPY}   ${CASE}*output*inf* ${archive}/esp/rest
      else
         # output*inf must be copied back because it needs to be in ${RUNDIR}
         # when st_archive runs to save the results of the following assim
         ${MOVE}   ${CASE}*inf*${day_time}*  $hidedir

         # Don't need previous inf restarts now, but want them to be archived later.
         ${COPY}   $hidedir/${CASE}*output*inf*${day_time}* .
      endif

      # Hide the CAM 'restart' files from the previous cycle (day_time) from the archiver.
      ### edited by xhw ###
      # change ${MOVE} to ${COPY}
      # ${MOVE} will make us lose our historical files
      # ${MOVE}           ${CASE}*.{r,rs,rs1,rh0,h0,i}.*${day_time}* 
      ${COPY}           ${CASE}*.{r,rs,rs1,rh0,h0,i}.*${day_time}*    $hidedir
      ###---------------###

      # Move log files: *YYMMDD-HHMMSS.  [2] means the previous restart set is being moved.
      set rm_log = `echo $log_list[2] | sed -e "s/\./ /g;"`
      # -- (decrement by one) skips the gz at the end of the names.
      set rm_slot = $#rm_log
      if ($rm_log[$#rm_log] =~ gz) @ rm_slot--
      ${MOVE}  *$rm_log[$rm_slot]*  $hidedir
   endif

   # Restore CESM's timing logic for the first cycle of the next job.
   cd ${CASEROOT}
   ./xmlchange CHECK_TIMING=${CHECK_TIMING}
   cd ${RUNDIR}

   # Create a netCDF file which contains the names of DART inflation restart files.
   # This is needed in order to use the CESM st_archive mechanisms for keeping, 
   # in $RUNDIR, history files which are needed for restarts.
   # These special files must be labeled with '.rh.'.
   # St_archive looks in a .r. restart file for the names of these 'restart history' files.
   # DART's inflation files fit the definition of restart history files, so we put .rh. 
   # in their names.  Those file names must be found in a dart.r. file, which is created here.
   # Inflation restart file names for all components will be in this one restart file,
   # since the inflation restart files have the component names in them.

   set inf_list = `ls *output_{prior,post}inf_*.${ATM_DATE_EXT}.nc`
   set file_list = 'restart_hist = "./'$inf_list[1]\"
   set i = 2
   while ($i <= $#inf_list)
      set file_list = (${file_list}\, \"./$inf_list[$i]\")
      @ i++
   end
   cat << ___EndOfText >! inf_restart_list.cdl
       netcdf template {  // CDL file which ncgen will use to make a DART restart file
                          // containing just the names of the needed inflation restart files.
       dimensions:
            num_files = $#inf_list;
       variables:
            string  restart_hist(num_files);
            restart_hist:long_name = "DART restart history file names";
       data:
            $file_list;
       }
___EndOfText

   ncgen -k netCDF-4 -o ${CASE}.dart.r.${scomp}.${ATM_DATE_EXT}.nc inf_restart_list.cdl
   if ($status == 0) ${REMOVE} inf_restart_list.cdl

   echo "`date` -- END   ARCHIVING LOGIC"

   # DEBUG st_archive by making a shadow copy of this directory.
   module load nco

   if (-d ../run_shadow) ${REMOVE} -f ../run_shadow
   mkdir ../run_shadow

   set comps = ('cam_'  'clm2_'  'mosart_' 'dart')
   set vars  = ('nhfil' 'locfnh' 'locfnh'  'restart_hist')

   foreach f (`$LIST[1]`)
      set gr_stat = 1
      echo $f | grep '\.r\.'
      if ($status == 0) then
         set c = 1
         while ($c <= $#comps) 
            echo $f | grep $comps[$c]
            if ($status == 0) then
               echo "c = $c for $f"
#                set echo verbose
               set gr_stat = 0
               ncks -O -v $vars[$c] $f ../run_shadow/$f
               break
            endif
            @ c++
         end
      endif
      if ($gr_stat == 1) then
         ${LIST} -l $f >! ../run_shadow/$f
      endif
   end

endif

# ==============================================================================
# Compress the large coupler history files and DART files.
# ==============================================================================

echo "STARTING: compressing coupler history files and DART files at `date`"

${CASEROOT}/compress.csh $CASE $ATM_DATE_EXT $ensemble_size "hist dart" "$stages_all"
if ($status != 0) then
   echo "ERROR: Compression of coupler history files and DART files failed at `date`"
   # Ensure the removal of unneeded restart sets and copy of obs_seq.final are finished.
   wait
   exit 250
endif

echo "FINISHED: compressing coupler history files and DART files at `date`"
echo "`date` -- END CAM_ASSIMILATE"

# Ensure the removal of unneeded restart sets and copy of obs_seq.final are finished.
wait

exit 0

# <next few lines under version control, do not edit>
# $URL$
# $Revision$
# $Date$

compress.csh

compress.csh是 assimilation.csh自动调用的程序。作用是对DART生成的文件进行压缩打包，方便后面归档。但存在一下小bug需要修改

在171行做如下修改：

1 2	# foreach type ( ha2x1d hr2x ha2x3h ha2x1h ha2x1hi ) foreach type (hr2x ha2x3h ha2x1h ha2x1hi )

这一行中hr2x ha2x3h ha2x1h ha2x1hi是cpl输出的，需要在user_nl_cpl中加入

histaux_a2x1hr = .true.
histaux_a2x3hr = .true.
histaux_a2x1hri = .true.
histaux_a2x24hr = .true.
histaux_r2x = .true.

ha2x1d 似乎并不在cesm2.1.x中出现，我把他去掉了，似乎没有什么影响。

在压缩打包hr2x ha2x3h ha2x1h ha2x1hi等文件时，会先搜索他们的完整文件名。程序认为他们的文件名是以年-月-日-秒.nc结尾的。但时间上输出的是年-月-日.nc结尾的。这样程序就找不到这些文件，因而报错。这里通过重命名为年-月-日-秒.nc的方式解决。

在 while ( $i <= $ensemble_size) 后做如下修改

# set lastfilename = `printf "%s.cpl_%04d.%s.%s.nc%s" $case_name $i $type $ymds $ext`

set ymd_self = `echo $ymds | cut -d'-' -f1-3`
set lastfilename = `printf "%s.cpl_%04d.%s.%s.nc%s" $case_name $i $type $ymd_self $ext`
set newfilename = `printf "%s.cpl_%04d.%s.%s.nc%s" $case_name $i $type $ymds $ext`
mv $lastfilename $newfilename

另一处修改，本系统找不到mpiexec_mpt，直接sh

1
2
3

if ($task > 0) then
   # mpiexec_mpt -n $task launch_cf.sh ./mycmdfile
   sh ./mycmdfile

还有一些其他修改，下面也贴出完整的compress.csh

#!/bin/tcsh
#
# DART software - Copyright UCAR. This open source software is provided
# by UCAR, "as is", without charge, subject to all terms of use at
# http://www.image.ucar.edu/DAReS/DART/DART_download
#
#PBS  -N compress.csh
#PBS  -A P86850054
#PBS  -q premium
# For restarts:
# #PBS  -l select=9:ncpus=36:mpiprocs=36
# For hist: 6 * 80         = 480
# For dart: 1 + 2*(2 + 80) = 165  
#                            645 / 36 = 18
# For rest: 4 * 80         = 320 / 36 =  9
#PBS  -l select=18:ncpus=36:mpiprocs=36
#PBS  -l walltime=00:20:00
#PBS  -o compress.out
#PBS  -j oe 

# ------------------------------------------------------------------------------
# Purpose:
#
# compresses or uncompresses sets of files from a forecast or assimilation.
#
# ------------------------------------------------------------------------------
#
# Method:
#
# When called from a script (normally assimilate.csh), it compresses the files.
# When submitted as a batch job, it can also uncompress sets of files -  
# IF this script has been configured to use the right metadata to
# construct the expected data directories.
#
# The strategy is to create a cmdfile that contains a separate task on each line
# and dispatch that cmdfile to perform N simultaneous operations. That cmdfile
# has a syntax ( &> ) to put stderr and stdout in a single file.
# PBS requires 'setenv MPI_SHEPHERD true' for the cmdfile to work correctly.
#
# Compression method can depend on the file type, and in the future may include 
# lossy compression. This script is most often called by assimilate.csh, but can 
# be run as a batch job.
#
# Assimilate.csh runs this in 2 places:
# 1) Every cycle: 
#    +  all the cpl history (forcing) files.
#    +  DART output
#       >  stages of state files  
#             mean, sd  (no instance number)
#       >  obs_seq.final (no instance number)
#       >  Note: inflation files are never compressed.
# 2) Before archiving a restart set to archive/rest; all large restart files.
# ------------------------------------------------------------------------------

if ($#argv == 5) then
   # Called from assimilate.csh (or other script).
   set comp_cmd      = 'gzip'
   set case_name     = $1
   set ymds          = $2
   set ensemble_size = $3
   set sets          = ($4)
   set stages        = ($5)
   set data_dir      = '.'

else if ($#argv == 0) then
   # Edit these and run as a batch job.
   # 'sets' performs better when ordered by decreasing size (clm2 cpl cam cice hist dart)
   set comp_cmd      = 'gunzip'
   set case_name     = CESM2_1_80_3node
   set ymds          = 2010-07-17-64800
   set ensemble_size = 80
   set sets          = (hist dart)
   # set sets          = (clm2 cpl cam cice)
   set stages        = (preassim output)
   # set data_dir      = /glade/scratch/${USER}/${case_name}/archive/rest/${ymds}
   set data_dir      = /glade/scratch/${USER}/${case_name}/run

else 
   echo "Usage: call with exactly 5 arguments or submit as a batch job with 0 arguments:"
   echo '   ${scr_dir}/compress.csh case_name YYYY-MM-DD-SSSS ensemble_size "sets" "stages"'
   echo '   where '
   echo '   sets   = 1 or more of {clm2 cpl cam cice hist dart} to compress, separated by spaces'
   echo '   stages = 1 or more of stages {input, preassim, postassim, output} to compress.'
   echo ' -OR-'
   echo "   edit compress.csh ; qsub compress.csh"
   exit 17

endif

set cmd = `echo $comp_cmd | cut -d' ' -f1`
if ($cmd == 'gzip') then
   set ext = ''
else if ($cmd == 'gunzip') then
   set ext = '.gz'
else
   echo "ERROR: unrecognized command $cmd.  Don't know which extension to use"
   exit 27
endif

echo "In compress.csh:"
echo "   comp_cmd      = $comp_cmd"
echo "   case_name     = $case_name"
echo "   date          = $ymds"
echo "   ensemble_size = $ensemble_size"
echo "   sets          = $sets"
echo "   stages        = $stages"
echo "   data dir      = $data_dir"

cd $data_dir

# ------------------------------------------------------------------------------
# Fail if there are leftover error logs from previous compression.csh executions.

ls *.eo > /dev/null
if ($status == 0) then
   echo "ERROR; Existing compression log files: *.eo.  Exiting"
   exit 37
endif

# --------------------------
# Environment and commands.


setenv MPI_SHEPHERD true

setenv date 'date --rfc-3339=ns'

# ------------------------------------------------------------------------------
# Create the command file where each line is a separate command, task, operation, ....

\rm -f mycmdfile
touch mycmdfile

# 'task' is incremented continuously over all files; components, members, etc.
# 'task' is a running counter of jobs in mycmdfile.
set task = 0

foreach comp ( $sets )
echo "comp = $comp"
switch ($comp)
   # FIXME ... the coupler files may or may not have an instance number in them.
   case {clm2,cpl,cam,cice}:
      set i=1
      while ( $i <= $ensemble_size)
         # E.g. CAM6_80mem.cice_0001.r.2010-07-15-00000.nc
         set file_name = `printf "%s.%s_%04d.r.%s.nc%s" $case_name $comp $i $ymds $ext`
         # echo "   $file_name"
   
         # If the expected file exists, add the compression command 
         if (-f $file_name) then
            @ task++
            echo "$comp_cmd $file_name &> compress_${task}.eo " >> mycmdfile
         # Kluge to get around situations where an earlier job compressed the file,
         # but failed for some other reason, so it's being re-run.
         else if (-f ${file_name}.gz) then
            echo "$file_name already compressed"
         else
            echo 'ERROR: Could not find "'$file_name'" to compress.'
            exit 47
         endif

         @ i++
      end
      breaksw

   case hist:
      # Coupler history (forcing) files, ordered by decreasing size 
      # ha is not a necessary forcing file.  The others can do the job
      # and are much smaller.
      # foreach type ( ha2x1d hr2x ha2x3h ha2x1h ha2x1hi )
      foreach type (hr2x ha2x3h ha2x1h ha2x1hi )

         # Loop over instance number
         set i=1
         while ( $i <= $ensemble_size)
            # E.g. CAM6_80mem.cpl_0001.ha.2010-07-15-00000.nc
            # set lastfilename = `printf "%s.cpl_%04d.%s.%s.nc%s" $case_name $i $type $ymds $ext`
            #
            set ymd_self = `echo $ymds | cut -d'-' -f1-3`
            set lastfilename = `printf "%s.cpl_%04d.%s.%s.nc%s" $case_name $i $type $ymd_self $ext`
	    set newfilename = `printf "%s.cpl_%04d.%s.%s.nc%s" $case_name $i $type $ymds $ext`
            mv $lastfilename $newfilename
 
            set file_name = `printf "%s.cpl_%04d.%s.%s.nc%s" $case_name $i $type $ymds $ext`
      
            if (-f $file_name) then
               @ task++
                  echo "$comp_cmd $file_name &> compress_${task}.eo" >> mycmdfile
            else if (-f ${file_name}.gz) then
               echo "$file_name already compressed"
            else
               echo 'ERROR: Could not find "'$file_name'" to compress.'
               echo "exit is prevented by xhw" 
	       # exit 57
            endif

            @ i++
         end
      end
      breaksw

   case dart:
      # It is not worthwhile to compress inflation files ... small, not many files
      # It is also not clear that binary observation sequence files compress effectively.

      # obs_seq.final (no inst)   70% of 1 Gb (ascii) in 35 sec
      # E.g. CAM6_80mem.dart.e.cam_obs_seq_final.2010-07-15-00000
      set file_name = ${case_name}.dart.e.cam_obs_seq_final.${ymds}${ext}
      if (-f $file_name) then
         @ task++
         echo "$comp_cmd $file_name &> compress_${task}.eo" >> mycmdfile
      endif

      foreach stage ($stages)
         foreach stat ( 'mean' 'sd' )
            # E.g. CAM6_80mem.e.cam_output_mean.2010-07-15-00000.nc
            # E.g. CAM6_80mem.e.cam_output_sd.2010-07-15-00000.nc
            set file_name = ${case_name}.dart.e.cam_${stage}_${stat}.${ymds}.nc${ext}
            if (-f $file_name) then
               @ task++
               echo "$comp_cmd $file_name &> compress_${task}.eo" >> mycmdfile
            endif
         end

         # Loop over instance number
         set i=1
         while ( $i <= $ensemble_size)
            # E.g. CAM6_80mem.cam_0001.e.preassim.2010-07-15-00000.nc
            set file_name = `printf "%s.cam_%04d.e.%s.%s.nc%s" $case_name $i $stage $ymds $ext`
            if (-f $file_name) then
               @ task++
               echo "$comp_cmd $file_name &> compress_${task}.eo" >> mycmdfile
            endif
            @ i++
         end
      end
      breaksw
      
   default:
      breaksw
endsw
end

# ------------------------------------------------------------------------------

echo "Before launching mycmdfile"

$date 

# CHECKME ... make sure $task is <= the number of MPI tasks in this job.

if ($task > 0) then
   # mpiexec_mpt -n $task launch_cf.sh ./mycmdfile
   sh ./mycmdfile

   set mpt_status = $status
   echo "mpt_status = $mpt_status"
else
   echo "No compression to do"
   exit 0
endif

# Check the statuses?
if ( -f compress_1.eo ) then
   grep $cmd *.eo
   # grep failure = compression success = "not 0"
   set gr_stat = $status
#    echo "gr_stat when eo file exists = $gr_stat"
else
   # No eo files = failure of something besides g(un)zip.
   set gr_stat = 0
#    echo "gr_stat when eo file does not exist = $gr_stat"
endif

if ($gr_stat == 0) then
   echo "compression failed.  See .eo files with non-0 sizes"
   echo "Remove .eo files after failure is resolved."
   exit 197
else
   # Compression worked; clean up the .eo files and mycmdfile
   \rm -f *.eo  mycmdfile
endif

exit 0

关于FWSD和FWHIST

DART目前无法对有ism(冰川)模块的compset进行同化。在创建FWSD时，默认是没有冰川COMP_GLC: sglc 。因此FWSD是可以直接用DART的。但是如果不希望有nudging，一种方法是把FWSD里的max_rlx_bottom和max_rlx_top调低，例如max_rlx_bottom=1， max_rlx_top=2 (最好不要设成零点几，会出错)。这样相对于几乎没有进行nudge。

另一种只能创建FWHIST，但FWHIST默认的冰川是cism。我目前不会调整compset的组合。所以这个暂时没法用FWHIST。

在用FWSD时，需要频繁地修改met_data。例如我从2012-12-01-00000开始运行，met_data时间戳是2012-12-01。如果程序运行至2012-12-02，那么模式会自己找到对应的met_data没问题。如果运行到了2012-12-03，那么会出现模式就找不到met_data了。这时需要手动（或在assimition.csh里添加脚本），将user_nl_cam里的met_data修改为2012-12-03。也就是说模式只能找到两天内正确的met_data。

一般DART的工作流程是这样的。先运行6小时，然后停下运行assimilation。接着再运行六小时以此类推。由于模式只能找到两天内的正确的met_data, 应该设置./xmlchange DATA_ASSIMILATION_CYCLES=8, 然后设置RESUBMIT为任意值。

观测数据

如果我们从2012-12-01-00000运行6小时到2012-12-01-21600, 这时DART就启动同化。它会在以2012-12-01-21600为中心的6个小时内，即(2012-12-01-10800, 2012-12-01-32400]内寻找观测数据。

我所试验的同化数据是Aura MLS temperature Level2, 下载地址为GES DISC Search: Showing 1 - 25 of 179 datasets associated with MLS (nasa.gov)。

我们首先需要将他处理为nc格式，处理python程序如下

#%%
import h5py
import numpy as np
from netCDF4 import Dataset
import xarray as xr
import pandas as pd
import glob


# %%  xarray
filelist = glob.glob('/data2/share/elzd_2024_000125/xhw/downloadData/MLS/2013/Level2/MLS*')
filelist = sorted(filelist)
for i in range(len(filelist)):
    f = h5py.File(filelist[i], mode='r')
    f.visit(lambda x: print(x))


    T = f['HDFEOS/SWATHS/Temperature/Data Fields/L2gpValue'][:]
    datafield = '/HDFEOS/SWATHS/Temperature/Data Fields/'
    geofield = '/HDFEOS/SWATHS/Temperature/Geolocation Fields/'
    pres_path = '/HDFEOS/SWATHS/Temperature/Geolocation Fields/Pressure'
    lat_path = '/HDFEOS/SWATHS/Temperature/Geolocation Fields/Latitude'
    lon_path = '/HDFEOS/SWATHS/Temperature/Geolocation Fields/Longitude'
    time_path = 'HDFEOS/SWATHS/Temperature/Geolocation Fields/Time'

    longitude = f[lon_path][:]
    latitude = f[lat_path][:]
    timeMLS = f[time_path][:]
    level = f[pres_path][:]
    Precision = f[datafield+'L2gpPrecision'][:]
    Quality = f[datafield+'Quality'][:]
    Convergence = f[datafield+'Convergence'][:]
    LocalSolarTime = f[geofield+'LocalSolarTime'][:]


    time = np.datetime64('1993-01-01T00') + f[time_path][:].astype('timedelta64[s]')

    
    # dfall = xr.DataArray(T, coords={'time':time, 'level':level, 'longitude':('time', longitude), 'latitude':('time', latitude)}, dims=['time', 'level'])
    T = xr.DataArray(T, coords={'times': ('times',time), 'levels':('levels',level)}, dims=['times', 'levels'], name='Temperature')
    Pressure = xr.DataArray(level, coords={'levels':('levels',level)}, dims=['levels'], name='Pressure')
    Longitude = xr.DataArray(longitude, coords={'times':('times',time)}, dims=['times'], name='Longitude')
    Latitude = xr.DataArray(latitude, coords={'times':('times',time)}, dims=['times'], name='Latitude')
    Precision = xr.DataArray(Precision, coords={'times': ('times',time), 'levels':('levels',level)}, dims=['times', 'levels'], name='Precision')
    Quality = xr.DataArray(Quality, coords={'times':('times',time)}, dims=['times'], name='Quality')
    Convergence = xr.DataArray(Convergence, coords={'times':('times',time)}, dims=['times'], name='Convergence')
    LocalSolarTime = xr.DataArray(LocalSolarTime, coords={'times':('times',time)}, dims=['times'], name='LocalSolarTime')
    Time = xr.DataArray(time, coords={'times':('times',time)}, dims=['times'], name='Time')
    
    DAYOFYEAR = Time.times.dt.dayofyear.values
    DAYOFYEAR = xr.DataArray(DAYOFYEAR, coords={'times':('times',time)}, dims=['times'], name='DAYOFYEAR')
    
    YEAR = Time.times.dt.year.values
    YEAR = xr.DataArray(YEAR, coords={'times':('times',time)}, dims=['times'], name='YEAR')
    
    
    if i == 0:
        dsall = xr.merge([Time, Pressure, Latitude, Longitude, LocalSolarTime, T, Precision, Quality, Convergence, DAYOFYEAR, YEAR])

    else:
        ds = xr.merge([Time, Pressure, Latitude, Longitude, LocalSolarTime, T, Precision, Quality, Convergence, DAYOFYEAR, YEAR])
        
        dsall = xr.concat([dsall, ds], dim='times')
# %%

timename =  pd.date_range('2012-12-01-00','2013-01-25', freq='6H')
dsall.sel(times='2012-12-03')
# i = 20

for i in range(timename.shape[0]-1):
    
    left = timename[i] - np.timedelta64(3,'h')
    right = timename[i] + np.timedelta64(3,'h')
    
    ds_temp = dsall.sel(times =slice((left+np.timedelta64(1,'s')).strftime('%Y-%m-%dT%H:%S'), 
                                     (right-np.timedelta64(1,'s')).strftime('%Y-%m-%dT%H:%S')
                                     ))
    ds_temp['times'] = (ds_temp.times.values -  np.datetime64('1993-01-01T00')).astype('timedelta64[s]').astype(int)

    secondofday = timename[i].hour * 3600
    secondofday = "{:0<5d}".format(secondofday)
    day = timename[i].strftime('%Y-%m-%d')


    fileout_path = '/data2/share/elzd_2024_000125/xhw/downloadData/MLS/2013/Level2/DART_nc/201212_6H_CESM'
    ds_temp.to_netcdf(f'{fileout_path}/{day}-{secondofday}.nc')

以上程序会将MLS数据每6小时存为一个nc文件。接着使用DART自带的转换工具，将nc文件转换为DART能够读懂的文本文件。该文件位于 /public/home/elzd_2024_000125/DART/observations/obs_converters/AURA/work 使用之前运行 ./quickbuild.sh 进行编译。

然后修改run_setup.sh 修改相关信息

@ year = 2012
foreach month (12)
foreach day (`seq 12 13`)  # 处理2012年12月12-13日的数据
foreach second (00000 21600 43200 64800)



@ dayofyear = 335 + $day  # 提前算好2012-12-12日是一年中第几天
        
 set new_dayofyear = `printf "%03d" $dayofyear`
 set new_month = `printf "%02d" $month` 
 set new_day = `printf "%02d" $day`
 set new_second = `printf "%05d" $second`
 set new_year = `printf "%04d" $year`
    
    
 sed -e "s/DAYOFYEAR/${new_dayofyear}/" \
     -e "s/REPLACE_year/${new_year}/" \ 
     -e "s/REPLACE_month/${new_month}/" \
     -e "s/REPLACE_day/${new_day}/" \
     -e "s/REPLACE_second/${new_second}/" \
    inputDOY244.nml > input.nml

 ./convert_aura

# @ dayofyear = $dayofyear + 1

end
end
end

同时修改该目录下的 inputDOY244.nml

&convert_aura_nml
  aura_netcdf_file = '/data2/share/elzd_2024_000125/xhw/downloadData/MLS/2013/Level2/DART_nc/201212_6H_CESM/REPLACE_year-REPLACE_month-REPLACE_day-REPLACE_second.nc',
  aura_netcdf_filelist = '',
  aura_outfile = '/data2/share/elzd_2024_000125/xhw/downloadData/MLS/2013/Level2/DART_seq/201212_6H_CESM/obs_seq.REPLACE_year-REPLACE_month-REPLACE_day-REPLACE_second',
  aura_yr = REPLACE_year,
  aura_doy = DAYOFYEAR ,
 /
  
&obs_kind_nml
   assimilate_these_obs_types = 'AURAMLS_TEMPERATURE',

需要注意的是，这个转换工具有个bug，就是如果当前的时间戳是2012-12-02-00000，这时

按理说应该以2号00000时为中心，左边三小时，右边三小时都放在一起方便同化。但是在识别左边三小时的数据时间戳时，会把1号识别成2号。从而在这个同化窗口内，数据值没错，但时间戳错了（1号写成了2号）。这时需要修改 DART/observations/obs_converters/AURA/convert_aura.f90 由于我们在nc文件中提前写入了，当前数据值属于一年中的哪一天。所以我们就用这个信息控制时间戳。以下是convert_aura.f90 的脚本

! This code is not protected by the DART copyright agreement.
! DART $Id$

!     Nick Pedatella
!     Program to convert Aura Temperature data to DART Observation
!     sequence files. 

program convert_aura

use        types_mod, only : r8
use time_manager_mod, only : time_type, set_calendar_type, GREGORIAN, set_time,&
                             increment_time, get_time, set_date, operator(-),  &
                             print_date, operator(+)
use    utilities_mod, only : initialize_utilities, find_namelist_in_file,    &
                             check_namelist_read, nmlfileunit, do_nml_file,    &
                             get_next_filename, error_handler, E_ERR, E_MSG, &
                             find_textfile_dims, finalize_utilities, &
                             timestamp,do_nml_term
use  netcdf_utilities_mod, only : nc_check
use     location_mod, only : VERTISPRESSURE, set_location ! pressure ccordinates for SABER
use obs_sequence_mod, only : obs_sequence_type, obs_type, read_obs_seq,       &
                             static_init_obs_sequence, init_obs, destroy_obs, &
                             write_obs_seq, init_obs_sequence, get_num_obs,   &
                             insert_obs_in_seq, destroy_obs_sequence,         &
                             set_copy_meta_data, set_qc_meta_data, set_qc,    & 
                             set_obs_values, set_obs_def, insert_obs_in_seq
use obs_def_mod,      only : obs_def_type, set_obs_def_time, set_obs_def_type_of_obs, &
                             set_obs_def_error_variance, set_obs_def_location, &
                             set_obs_def_key
use obs_utilities_mod, only : create_3d_obs,add_obs_to_seq
use     obs_kind_mod, only : AURAMLS_TEMPERATURE

use           netcdf

implicit none

! version controlled file description for error handling, do not edit
character(len=256), parameter :: source   = &
   "$URL$"
character(len=32 ), parameter :: revision = "$Revision$"
character(len=128), parameter :: revdate  = "$Date$"

integer, parameter :: num_copies = 1,   &   ! number of copies in sequence
                      num_qc     = 1        ! number of QC entries

! variables
character (len=130) :: msgstring, next_infile
character (len=80) :: name

integer :: iyear,iday,imonth,idoy,ihr,imin,isec,nalt,nevent,ialt,ievent, &
           nfiles,ncid,varid,filenum,io,iunit,obs_num,num_new_obs,k,osec,oday
logical :: file_exist, first_obs, did_obs, from_list = .false.

real(r8) :: qc,oerr,pres_out

real(r8), allocatable :: lat(:), lon(:), pres(:),temp(:,:),qual(:),conv(:),time(:), DAYOFYEAR(:), YEAR(:)
real(r8), allocatable :: localsolartime(:)

type(obs_def_type)      :: obs_def
type(obs_sequence_type) :: obs_seq
type(obs_type)          :: obs, prev_obs
type(time_type)         :: time_obs, time_anal, prev_time


! namelist parameters
character(len=128) :: aura_netcdf_file     = 'aura_input.nc'
character(len=128) :: aura_netcdf_filelist = 'aura_input_list'
character(len=128) :: aura_outfile        = 'obs_seq.aura'
integer :: aura_yr=2011,aura_doy = 1

namelist /convert_aura_nml/ aura_netcdf_file, aura_netcdf_filelist, &
                             aura_outfile,aura_yr,aura_doy

! initialize some values
obs_num = 1
qc = 1.0_r8
first_obs = .true.
call set_calendar_type(GREGORIAN)

!  read the necessary parameters from input.nml
call initialize_utilities()

call find_namelist_in_file("input.nml", "convert_aura_nml", iunit)
read(iunit, nml = convert_aura_nml, iostat = io)
call check_namelist_read(iunit, io, "convert_aura_nml")

! Record the namelist values used for the run 
if (do_nml_file()) write(nmlfileunit, nml=convert_aura_nml)
if (do_nml_term()) write(     *     , nml=convert_aura_nml)

! namelist checks for sanity

! cannot have both a single filename and a list; the namelist must
! shut one off.
if (aura_netcdf_file /= '' .and. aura_netcdf_filelist /= '') then
  call error_handler(E_ERR, 'convert_aura_cdf',                     &
                     'One of aura_netcdf_file or filelist must be NULL', &
                     source, revision, revdate)
endif
if (aura_netcdf_filelist /= '') from_list = .true.

! Define Number of observations:
if (from_list) then
  num_new_obs = (400 * 40000) * nfiles
else
  num_new_obs = (400 * 40000 )
endif

! output date
print *, 'OUTPUTING FOR YR=',aura_yr,'DOY=',aura_doy

!  either read existing obs_seq or create a new one
call static_init_obs_sequence()
call init_obs(obs, num_copies, num_qc)
call init_obs(prev_obs, num_copies, num_qc)
inquire(file=aura_outfile, exist=file_exist)
if ( file_exist ) then

   print *, "found existing obs_seq file, overwriting ", trim(aura_outfile)
  print *, "max entries = ", num_new_obs
  call init_obs_sequence(obs_seq, num_copies, num_qc, num_new_obs)

  do k = 1, num_copies
    call set_copy_meta_data(obs_seq, k, 'observation')
  end do
  do k = 1, num_qc
    call set_qc_meta_data(obs_seq, k, 'Data QC')
  end do

else

  print *, "no existing obs_seq file, creating ", trim(aura_outfile)
  print *, "max entries = ", num_new_obs
  call init_obs_sequence(obs_seq, num_copies, num_qc, num_new_obs)

  do k = 1, num_copies
    call set_copy_meta_data(obs_seq, k, 'observation')
  end do
  do k = 1, num_qc
    call set_qc_meta_data(obs_seq, k, 'Data QC')
  end do

end if


did_obs = .false.
! main loop that does either a single file or a list of files

filenum = 1

fileloop: do  ! until out of files

   ! get the single name, or the next name from a list
   if (from_list) then 
      next_infile = get_next_filename(aura_netcdf_filelist, filenum)
   else
      next_infile = aura_netcdf_file
      if (filenum > 1) next_infile = ''
   endif
   if (next_infile == '') exit fileloop

   ! open the file
   call nc_check( nf90_open(next_infile, nf90_nowrite, ncid), 'file open', next_infile)

   ! get the number of events and altitude:
   call nc_check( nf90_inq_dimid(ncid,"levels",varid), 'inq dimid altitude')
   call nc_check( nf90_inquire_dimension(ncid,varid,name,nalt), 'inq dim altitude')
   
   call nc_check( nf90_inq_dimid(ncid,"times",varid), 'inq dimid event')
   call nc_check( nf90_inquire_dimension(ncid,varid,name,nevent), 'inq dim event')
   print *, 'nalt=',nalt
   allocate( time(nevent) )
   allocate( lat(nevent) )
   allocate( lon(nevent) )
   allocate( pres(nalt) )
   allocate( temp(nalt,nevent) )
   allocate( qual(nevent) )
   allocate( conv(nevent) )
   allocate( localsolartime(nevent) )
   allocate( DAYOFYEAR(nevent) )
   allocate( YEAR(nevent) )


   ! read in the latitude array
   call nc_check( nf90_inq_varid(ncid, "Latitude", varid) ,'inq varid Lat')
   call nc_check( nf90_get_var(ncid, varid, lat)     ,'get var   Lat')

   ! read the longitude array
   call nc_check( nf90_inq_varid(ncid, "Longitude", varid) ,'inq varid Lon')
   call nc_check( nf90_get_var(ncid, varid, lon)     ,'get var   Lon')

   ! read the altitude array
   call nc_check( nf90_inq_varid(ncid, "Pressure", varid) ,'inq varid pressure')
   call nc_check( nf90_get_var(ncid, varid, pres)        ,'get_var   pressure')

   ! read the temperature
   call nc_check( nf90_inq_varid(ncid, "Temperature", varid) ,'inq varid ktemp')
   call nc_check( nf90_get_var(ncid, varid, temp)        ,'get_var   ktemp')
   
   ! quality
   call nc_check( nf90_inq_varid(ncid, "Quality", varid) ,'inq varid quality')
   call nc_check( nf90_get_var(ncid, varid, qual)        ,'get_var   quality')
   ! convergence
   call nc_check( nf90_inq_varid(ncid, "Convergence", varid) ,'inq varid conv')
   call nc_check( nf90_get_var(ncid, varid, conv)        ,'get_var   conv')
   ! LST
   call nc_check( nf90_inq_varid(ncid, "LocalSolarTime", varid) ,'inq varid lst')
   call nc_check( nf90_get_var(ncid, varid, localsolartime)        ,'get_var   lst')

   ! read the date/time
   call nc_check( nf90_inq_varid(ncid, "Time", varid) ,'inq varid time')
   call nc_check( nf90_get_var(ncid, varid, time)        ,'get_var   time')


   ! read the date/time
   call nc_check( nf90_inq_varid(ncid, "DAYOFYEAR", varid) ,'inq varid DAYOFYEAR')
   call nc_check( nf90_get_var(ncid, varid, DAYOFYEAR)        ,'get_var DAYOFYEAR')

   ! read the date/time
   call nc_check( nf90_inq_varid(ncid, "YEAR", varid) ,'inq varid YEAR')
   call nc_check( nf90_get_var(ncid, varid, YEAR)        ,'get_var YEAR')


   
   ! finished reading the data 
   call nc_check( nf90_close(ncid), 'close file')






   ! now loop through each event / altitude and add to obs sequence file
   do ievent=1,nevent

     do ialt=1,nalt

       !calculate seconds from LST
       isec = int( (localsolartime(ievent)-(lon(ievent)/15.))/24.*86400. )
       if (isec.le.0) isec = isec + 86400       
       if (isec.ge.86400) isec = isec - 86400       

       ! iyear = aura_yr ! yr & day are inputs
       ! idoy = aura_doy

       ! edited by xhw 
       iyear = YEAR(ievent) ! yr & day are inputs
       idoy = DAYOFYEAR(ievent)

 
       call convert_day(iyear,idoy,imonth,iday)

       if (temp(ialt,ievent).ne.-999. .and. & ! data is missing
           qual(ievent).ge.0.65 .and. & !specified quality check
           conv(ievent).le.1.2 .and. & ! specified convergence
           pres(ialt).ge.0.001 .and. pres(ialt).le.260 .and. & !good altitude range
           isec .lt.86400 ) then 
           

         ihr = int( isec / 3600 )
         imin = int( (isec-ihr*3600) / 60 )
         isec = int( (isec-ihr*3600)-imin*60 )
         time_obs = set_date(iyear,imonth,iday,ihr,imin,isec)
         call get_time(time_obs,osec,oday)

         ! need to set the error (for now just take SABER error):
         call saber_error(pres(ialt),oerr)

         ! convert pressure from mbar(hPa) -> Pa
         pres_out = pres(ialt)*100.

         if (lon(ievent).le.0) lon(ievent) = lon(ievent)+360.

         ! now creat the observation:
         call create_3d_obs(lat(ievent),lon(ievent),pres_out, &
                            VERTISPRESSURE,temp(ialt,ievent), &
                          AURAMLS_TEMPERATURE,oerr,oday,osec,qc,obs)
         call add_obs_to_seq(obs_seq,obs,time_obs,prev_obs,prev_time,first_obs)

         if(.not. did_obs) did_obs = .true.
       else

       end if
     end do ! end alt loop
   end do ! end event loop

   ! clean up and loop if there is another input file
   deallocate( lat,lon,pres,qual,conv,time,temp)
 
   filenum = filenum + 1

end do fileloop

! done with main loop. if we added any obs to the sequence, write it out:
if (did_obs) then
!print *, 'ready to write, nobs = ', get_num_obs(obs_seq)
   if (get_num_obs(obs_seq) > 0) &
      call write_obs_seq(obs_seq, aura_outfile)

   ! minor stab at cleanup, in the off chance this will someday get turned
   ! into a subroutine in a module.  probably not all that needs to be done,
   ! but a start.
   call destroy_obs(obs)
! if obs == prev_obs then you can't delete the same obs twice.
! buf if they differ, then it's a leak.  for now, don't delete prev
! since the program is exiting here anyway.
   !call destroy_obs(prev_obs)
   if (get_num_obs(obs_seq) > 0) call destroy_obs_sequence(obs_seq)
endif

! END OF MAIN ROUTINE

contains

subroutine convert_day(iyear,idoy,imonth,iday)
! NMP - Subroutine to convert from yr,doy to yr,mon,day


 integer, intent(in)   :: iyear
 integer, intent(in)   :: idoy
 integer, intent(out)  :: imonth
 integer, intent(out)  :: iday

 integer :: t

 t = 0
 if(modulo(iyear, 4) == 0) t = 1

 if(modulo(iyear, 400) /= 0 .and. modulo(iyear, 100) == 0) t = 0

 iday = idoy
 if(idoy > 59+t) iday = iday + 2 - t
 imonth = ((iday+91)*100)/3055
 iday = (iday+91) - (imonth*3055)/100
 imonth = imonth - 2

 if(imonth >= 1 .and. imonth <= 12) return
 write(unit=*,fmt="(a,i11,a)")  &
 "$$CALEND: DAY OF THE YEAR INPUT =",idoy," IS OUT OF RANGE."
 stop

end subroutine


subroutine saber_error(pres,err)

real(r8), intent(in)  :: pres
real(r8), intent(out) :: err

! NMP - simple function to calculate the error based on height
!  function loosely based on Remsberg et al (2008JGR)

err = 1.6718_r8 - 0.4281_r8*log(pres) + 0.0977_r8*log(pres)**2

end subroutine


end program

! <next few lines under version control, do not edit>
! $URL$
! $Id$
! $Revision$
! $Date$

修改完成后需要重新编译, quickbuild.sh 但是由于运行run_setup.sh 会覆盖编译使用的input.nml 。我们可以从官网再单独下载一下放到目录下DART/observations/obs_converters/AURA/work/input.nml at main · NCAR/DART (github.com)，然后quickbuild.sh