From 2ecef5038e81386a002740f6b0112330117c4f6e Mon Sep 17 00:00:00 2001 From: Jason Edgecombe Date: Thu, 29 Nov 2007 03:30:41 +0000 Subject: [PATCH] man-page-additions-20071128 Add new man pages for vos convertROtoRW, vos copy, and read_tape and add a troubleshooting section to the fileserver man page. --- doc/man-pages/README | 4 +- doc/man-pages/pod1/vos.pod | 3 +- doc/man-pages/pod1/vos_convertROtoRW.pod | 123 +++++++++++++++ doc/man-pages/pod1/vos_copy.pod | 158 +++++++++++++++++++ doc/man-pages/pod8/fileserver.pod | 187 +++++++++++++++++++++-- doc/man-pages/pod8/read_tape.pod | 106 +++++++++++++ 6 files changed, 564 insertions(+), 17 deletions(-) create mode 100644 doc/man-pages/pod1/vos_convertROtoRW.pod create mode 100644 doc/man-pages/pod1/vos_copy.pod create mode 100644 doc/man-pages/pod8/read_tape.pod diff --git a/doc/man-pages/README b/doc/man-pages/README index 86cbae175..e2cc192c4 100644 --- a/doc/man-pages/README +++ b/doc/man-pages/README @@ -205,13 +205,11 @@ Known Problems fs rxstatpeer fs rxstatproc fs setcbaddr - read_tape restorevol rmtsysd vldb_convert vos clone - vos convertROtoRW - vos copy + vos setfields vos shadow vsys diff --git a/doc/man-pages/pod1/vos.pod b/doc/man-pages/pod1/vos.pod index d81af1031..1ea17e991 100644 --- a/doc/man-pages/pod1/vos.pod +++ b/doc/man-pages/pod1/vos.pod @@ -237,7 +237,8 @@ L, L, L, L, -L, +L, +L, L, L, L, diff --git a/doc/man-pages/pod1/vos_convertROtoRW.pod b/doc/man-pages/pod1/vos_convertROtoRW.pod new file mode 100644 index 000000000..b7f422e41 --- /dev/null +++ b/doc/man-pages/pod1/vos_convertROtoRW.pod @@ -0,0 +1,123 @@ +=head1 NAME + +vos_convertROtoRW - Converts a Read-Only volume into a Read/Write volume + +=head1 SYNOPSIS + +=for html +
+ +B S<<< [B<-server>] > >>> + S<<< [B<-partition>] > >>> + S<<< [B<-id>] > >>> [B<-force>] + S<<< [B<-cell> >] >>> [B<-noauth>] [B<-localauth>] + [B<-verbose>] [B<-encrypt>] [B<-help>] + +=for html +
+ +=head1 DESCRIPTION + +B converts a Read-Only volume into a Read/Write volume +when the original Read/Write volume is no longer available. It's normal use +is to recover the Read/Write volume from a replica after a failed disk, +failed server, or accidental deletion. + +=head1 CAUTIONS + +This command can only be used with namei AFS file servers. If used on an +inode AFS file server (normally found only on Solaris or AIX systems), it +will fail with an error. + +The command name is case-sensitive. It must be issued with the capital "RO" +and "RW". + +=head1 OPTIONS + +=over 4 + +=item B<-server> > + +Identifies the file server machine that houses the Read-Only volume which +will be converted. Provide the machine's IP address or its host name +(either fully qualified or using an unambiguous abbreviation). For details, +see L. + +=item B<-partition> > + +Identifies the partition on the file server machine that houses the +Read-Only volume which will be converted. Provide the full partition name +(for, example, B) or one of the abbreviated forms described in +L. + +=item B<-id> > + +Specifies either the complete name or volume ID number of a Read/Write +volume. + +=item B<-force> + +Don't ask for confirmation. + +=item B<-cell> > + +Names the cell in which to run the command. Do not combine this argument +with the B<-localauth> flag. For more details, see L. + +=item B<-noauth> + +Assigns the unprivileged identity C to the issuer. Do not +combine this flag with the B<-localauth> flag. For more details, see +L. + +=item B<-localauth> + +Constructs a server ticket using a key from the local +F file. The B command interpreter presents it +to the Volume Server and Volume Location Server during mutual +authentication. Do not combine this flag with the B<-cell> argument or +B<-noauth> flag. For more details, see L. + +=item B<-verbose> + +Produces on the standard output stream a detailed trace of the command's +execution. If this argument is omitted, only warnings and error messages +appear. + +=item B<-encrypt> + +Encrypts the command so that the operation's results are not transmitted +across the network in clear text. + +=item B<-help> + +Prints the online help for this command. All other valid options are +ignored. + +=back + +=head1 EXAMPLES + +The following example converts the read-only volume test3.readonly in +partition vicepb on server1 to a read-write volume: + + % vos convertROtoRW server1 b test3.readonly + +=head1 PRIVILEGE REQUIRED + +The issuer must be listed in the F file on the +machines specified with the B<-toserver> and B<-fromserver> arguments and +on each database server machine. If the B<-localauth> flag is included, +the issuer must instead be logged on to a server machine as the local +superuser C. + +=head1 SEE ALSO + +L + +=head1 COPYRIGHT + +Copyright 2007 Jason Edgecombe + +This documentation is covered by the IBM Public License Version 1.0. This +man page was written by Jason Edgecombe for OpenAFS. diff --git a/doc/man-pages/pod1/vos_copy.pod b/doc/man-pages/pod1/vos_copy.pod new file mode 100644 index 000000000..1cf7ec69d --- /dev/null +++ b/doc/man-pages/pod1/vos_copy.pod @@ -0,0 +1,158 @@ +=head1 NAME + +vos_copy - Make a copy of a volume + +=head1 SYNOPSIS + +=for html +
+ +B S<<< [B<-id>] > >>> + S<<< [B<-fromserver>] > >>> + S<<< [B<-frompartition>] > >>> + S<<< [B<-toname>] > >>> + S<<< [B<-toserver>] > >>> + S<<< [B<-topartition>] > >>> + [B<-offline>] [B<-readonly>] [B<-live>] S<<< [B<-cell> >] >>> + [B<-noauth>] [B<-localauth>] [B<-verbose>] [B<-encrypt>] [B<-help>] + +=for html +
+ +=head1 DESCRIPTION + +The B command makes a copy of a volume with a new name. It is +equivalent to B followed by B, but doesn't require +the volume be stored locally by the client. + +=head1 OPTIONS + +=over 4 + +=item [B<-id>] > + +Specifies either the complete name or volume ID number of a read/write +volume. + +=item [B<-fromserver>] > + +Identifies the file server machine where the source volume resides. Provide +the machine's IP address or its host name (either fully qualified or using +an unambiguous abbreviation). For details, see L. + +=item [B<-frompartition>] > + +Names the partition where the source volume resides. Provide the full +partition name (for, example, B) or one of the abbreviated forms +described in L. + +=item [B<-toname>] > + +The complete name of the new volume to create. + +=item [B<-toserver>] > + +Identifies the file server machine to which to copy the volume. Provide +the machine's IP address or its host name (either fully qualified or using +an unambiguous abbreviation). For details, see L. + +=item [B<-topartition>] > + +Names the partition to which to copy the volume. Provide the full partition +name (for, example, B) or one of the abbreviated forms described in +L. + +=item B<-offline> + +Leaves the new volume flagged as off-line in the volume database. + +=item B<-readonly> + +Flags the new volume as read-only in the volume database. + +=item B<-live> + +Copies the live volume without cloning. This is normally not necessary and +causes the volume to be kept locked for longer than the normal copy +mechanism. + +=item B<-localauth> + +Constructs a server ticket using a key from the local +F file. The B command interpreter presents it to +the Volume Server and Volume Location Server during mutual +authentication. Do not combine this flag with the B<-cell> argument or +B<-noauth> flag. For more details, see L. + +=item B<-verbose> + +Produces on the standard output stream a detailed trace of the command's +execution. If this argument is omitted, only warnings and error messages +appear. + +=item B<-encrypt> + +Encrypts the command so that the operation's results are not transmitted +across the network in clear text. + +=item B<-help> + +Prints the online help for this command. All other valid options are +ignored. + +=back + +=head1 OUTPUT + +This command has no output unless C<-verbose> is specified or there is +an error. + +=head1 EXAMPLES + +The following example makes a verbose copy of the C volume named +C in the cell C. The volume and copy both reside on +C of C. + + % vos copy test server1 a test2 server1 a -cell localcell -verbose + Starting transaction on source volume 536870921 ... done + Allocating new volume id for clone of volume 536870921 ... done + Allocating new volume id for copy of volume 536870921 ... done + Cloning source volume 536870921 ... done + Ending the transaction on the source volume 536870921 ... done + Starting transaction on the cloned volume 536870926 ... done + Setting flags on cloned volume 536870926 ... done + Getting status of cloned volume 536870926 ... done + Creating the destination volume 536870927 ... done + Setting volume flags on destination volume 536870927 ... done + Dumping from clone 536870926 on source to volume 536870927 on destination ... done + Ending transaction on cloned volume 536870926 ... done + Starting transaction on source volume 536870921 ... done + Doing the incremental dump from source to destination for volume 536870921 ... done + Setting volume flags on destination volume 536870927 ... done + Ending transaction on destination volume 536870927 ... done + Ending transaction on source volume 536870921 ... done + Starting transaction on the cloned volume 536870926 ... done + Deleting the cloned volume 536870926 ... done + Ending transaction on cloned volume 536870926 ... done + Created the VLDB entry for the volume test2 536870927 + Volume 536870921 copied from server1 /vicepa to test2 on server1 /vicepa + +=head1 PRIVILEGE REQUIRED + +The issuer must be listed in the F file on the +machine specified with the B<-server> argument and on each database server +machine. If the B<-localauth> flag is included, the issuer must instead be +logged on to a server machine as the local superuser C. + +=head1 SEE ALSO + +L, +L, +L + +=head1 COPYRIGHT + +Copyright 2007 Jason Edgecombe + +This documentation is covered by the IBM Public License Version 1.0. This +man page was written by Jason Edgecombe for OpenAFS. diff --git a/doc/man-pages/pod8/fileserver.pod b/doc/man-pages/pod8/fileserver.pod index 5b82544a6..ea11f0f99 100644 --- a/doc/man-pages/pod8/fileserver.pod +++ b/doc/man-pages/pod8/fileserver.pod @@ -17,7 +17,7 @@ B S<<< [B<-auditlog> >] >>> S<<< [B<-cb> >] >>> [B<-banner>] [B<-novbc>] S<<< [B<-implicit> >] >>> [B<-readonly>] S<<< [B<-hr> >] >>> - [B<-busyat> n >>>] + S<<< [B<-busyat> n >>>] >>> [B<-nobusy>] S<<< [B<-rxpck> >] >>> [B<-rxdbg>] [B<-rxdbge>] S<<< [B<-rxmaxmtu> >] >>> S<<< [B<-rxbind> >] >>> @@ -48,9 +48,9 @@ file server machine as the local superuser C. The File Server creates the F log file as it initializes, if the file does not already exist. It does not write a -detailed trace by default, but use the B<-d> option to increase the amount -of detail. Use the B command to display the contents of the -log file. +detailed trace by default, but the B<-d> option may be used to +increase the amount of detail. Use the B command to +display the contents of the log file. The command's arguments enable the administrator to control many aspects of the File Server's performance, as detailed in L. By default @@ -68,7 +68,7 @@ machine sizes. The maximum number of lightweight processes (LWPs) the File Server uses to handle requests for data; corresponds to the B<-p> argument. The File -Server always uses a minimum of 32 KB for these processes. +Server always uses a minimum of 32 KB of memory for these processes. =item * @@ -168,6 +168,16 @@ the Protection Server every two hours to recompute host CPSs, implying that it can take that long for changed group memberships to become effective. To change this frequency, use the B<-hr> argument. +The File Server stores volumes in partitions. A partition is a +filesystem or directory on the server machine that is named C +or C where XX is "a" through "z" or "aa" though "zz". The +File Server expects that the /vicepXX directories are each on a +dedicated filesystem. The File Server will only use a /vicepXX if it's +a mountpoint for another filesystem, unless the file +C exists. The data in the partition is a +special format that can only be access using OpenAFS commands or an +OpenAFS client. + The File Server generates the following message when a partition is nearly full: @@ -178,12 +188,12 @@ suites. Provide the command name and all option names in full. =head1 CAUTIONS -Do not use the B<-k> and -w arguments, which are intended for use by the -AFS Development group only. Changing them from their default values can -result in unpredictable File Server behavior. In any case, on many -operating systems the File Server uses native threads rather than the LWP -threads, so using the B<-k> argument to set the number of LWP threads has -no effect. +Do not use the B<-k> and B<-w> arguments, which are intended for use +by the AFS Development group only. Changing them from their default +values can result in unpredictable File Server behavior. In any case, +on many operating systems the File Server uses native threads rather +than the LWP threads, so using the B<-k> argument to set the number of +LWP threads has no effect. Do not specify both the B<-spare> and B<-pctspare> arguments. Doing so causes the File Server to exit, leaving an error message in the @@ -398,10 +408,160 @@ line: -cmd "/usr/afs/bin/fileserver -pctspare 10 \ -L" /usr/afs/bin/volserver /usr/afs/bin/salvager + +=head1 TROUBLESHOOTING + +Sending process signals to the File Server Process can change its +behavior in the following ways: + + Process Signal OS Result + --------------------------------------------------------------------- + + File Server XCPU Unix Prints a list of client IP + Addresses. + + File Server USR2 Windows Prints a list of client IP + Addresses. + + File Server POLL HPUX Prints a list of client IP + Addresses. + + Any server TSTP Any Increases Debug level by a power + of 5 -- 1,5,25,125, etc. + This has the same effect as the + -debug XXX command-line option. + + Any Server HUP Any Resets Debug level to 0 + + File Server TERM Any Run minor instrumentation over + the list of descriptors. + + Other Servers TERM Any Causes the process to quit. + + File Server QUIT Any Causes the File Server to Quit. + Bos Server knows this. + +The basic metric of whether an AFS file server is doing well is the number +of connections waiting for a thread, +which can be found by running the following command: + + % rxdebug | grep waiting_for | wc -l + +Each line returned by C that contains the text "waiting_for" +represents a connection that's waiting for a file server thread. + +If the blocked connection count is ever above 0, the server is having +problems replying to clients in a timely fashion. If it gets above 10, +roughly, there will be noticable slowness by the user. The total number of +connections is a mostly irrelevant number that goes essentially +monotonically for as long as the server has been running and then goes back +down to zero when it's restarted. + +The most common cause of blocked connections rising on a server is some +process somewhere performing an abnormal number of accesses to that server +and its volumes. If multiple servers have a blocked connection count, the +most likely explanation is that there is a volume replicated between those +servers that is absorbing an abnormally high access rate. + +To get an access count on all the volumes on a server, run: + + % vos listvol -long + +and save the output in a file. The results will look like a bunch of B output for each volume on the server. Look for lines like: + + 40065 accesses in the past day (i.e., vnode references) + +and look for volumes with an abnormally high number of accesses. Anything +over 10,000 is fairly high, but some volumes like root.cell and other +volumes close to the root of the cell will have that many hits routinely. +Anything over 100,000 is generally abnormally high. The count resets about +once a day. + +Another approach that can be used to narrow the possibilities for a +replicated volume, when multiple servers are having trouble, is to find all +replicated volumes for that server. Run: + + % vos listvldb -server + +where is one of the servers having problems to refresh the VLDB +cache, and then run: + + % vos listvldb -server -part + +to get a list of all volumes on that server and partition, including every +other server with replicas. + +Once the volume causing the problem has been identified, the best way to +deal with the problem is to move that volume to another server with a low +load or to stop any runaway programs that are accessing that volume +unnecessarily. Often the volume will be enough information to tell what's +going on. + +If you still need additional information about who's hitting that server, +sometimes you can guess at that information from the failed callbacks in the +F log in F on the server, or from the output of: + + % /usr/afsws/etc/rxdebug -rxstats + +but the best way is to turn on debugging output from the file server. +(Warning: This generates a lot of output into FileLog on the AFS server.) +To do this, log on to the AFS server, find the PID of the fileserver +process, and do: + + kill -TSTP + +where is the PID of the file server process. This will raise the +debugging level so that you'll start seeing what people are actually doing +on the server. You can do this up to three more times to get even more +output if needed. To reset the debugging level back to normal, use (The +following command will NOT terminate the file server): + + kill -HUP + +The debugging setting on the File Server should be reset back to normal when +debugging is no longer needed. Otherwise, the AFS server may well fill its +disks with debugging output. + +The lines of the debugging output that are most useful for debugging load +problems are: + + SAFS_FetchStatus, Fid = 2003828163.77154.82248, Host 171.64.15.76 + SRXAFS_FetchData, Fid = 2003828163.77154.82248 + +(The example above is partly truncated to highlight the interesting +information). The Fid identifies the volume and inode within the volume; +the volume is the first long number. So, for example, this was: + + % vos examine 2003828163 + pubsw.matlab61 2003828163 RW 1040060 K On-line + afssvr5.Stanford.EDU /vicepa + RWrite 2003828163 ROnly 2003828164 Backup 2003828165 + MaxQuota 3000000 K + Creation Mon Aug 6 16:40:55 2001 + Last Update Tue Jul 30 19:00:25 2002 + 86181 accesses in the past day (i.e., vnode references) + + RWrite: 2003828163 ROnly: 2003828164 Backup: 2003828165 + number of sites -> 3 + server afssvr5.Stanford.EDU partition /vicepa RW Site + server afssvr11.Stanford.EDU partition /vicepd RO Site + server afssvr5.Stanford.EDU partition /vicepa RO Site + +and from the Host information one can tell what system is accessing that +volume. + +Note that the output of L also includes the access count, so +once the problem has been identified, vos examine can be used to see if the +access count is still increasing. Also remember that you can run vos +examine on the read-only replica (e.g., pubsw.matlab61.readonly) to see the +access counts on the read-only replica on all of the servers that it's +located on. + =head1 PRIVILEGE REQUIRED The issuer must be logged in as the superuser C on a file server -machine to issue the command at a command shell prompt. It is conventional +machine to issue the command at a command shell prompt. It is conventional instead to create and start the process by issuing the B command. @@ -413,7 +573,8 @@ L, L, L, L, -L +L, +L =head1 COPYRIGHT diff --git a/doc/man-pages/pod8/read_tape.pod b/doc/man-pages/pod8/read_tape.pod new file mode 100644 index 000000000..d3aa433ff --- /dev/null +++ b/doc/man-pages/pod8/read_tape.pod @@ -0,0 +1,106 @@ +=head1 NAME + +read_tape - Reads volume dumps from a backup tape to a file + +=head1 SYNOPSIS + +=for html +
+ +B S<<< B<-tape> > >>> + S<<< B<-restore> > >>> + S<<< B<-skip> > >>> + S<<< B<-file> > >>> [B<-scan>] [B<-noask>] [B<-label>] + [B<-vheaders>] [B<-verbose>] [B<-help>] + +=for html +
+ +=head1 DESCRIPTION + +B reads an OpenAFS backup tape and prompts for each dump file to +save. This command does not require any OpenAFS infrastructure. This +command does not need an OpenAFS client or server to be available, which is +not the case with the L command. + +The dump files will be named for the Read/Write name of the volume restored. +After saving each dump file, B or B can be used to +restore the volume into AFS and non-AFS space respectively. + +B reads the tape while skipping the specified number of volumes. +After that, it restores the specified number of volumes. B +doesn't rewind the tape so that it may be used multiple times in succession. + +=head1 OPTIONS + +=over 4 + +=item B<-tape> > + +Specifies the tape device from which to restore. + +=item B<-restore> > + +Specifies the number of volumes to restore from tape. + +=item B<-skip> > + +Specifies the number of volumes to skip before starting the restore. + +=item B<-file> > + +Specifies an alternate name for the restored volume dump file rather than +the default of the volume name. + +=item B<-scan> + +Scans the tape. + +=item B<-noask> + +Doesn't prompt for each volume. + +=item B<-label> + +Displays the full dump label. + +=item B<-vheaders> + +Displays the full volume headers. + +=item B<-verbose> + +Produces on the standard output stream a detailed trace of the command's +execution. If this argument is omitted, only warnings and error messages +appear. + +=item B<-help> + +Prints the online help for this command. All other valid options are +ignored. + +=back + +=head1 EXAMPLES + +The following command will read the third through fifth volumes from +the tape device /dev/tape without prompting: + + % read_tape -tape /dev/tape -skip 2 -restore 3 -noask + +=head1 PRIVILEGE REQUIRED + +The issuer must have access to read and write to the specified tape device. + +=head1 SEE ALSO + +L, +L, +L + +=head1 COPYRIGHT + +Copyright 2007 Jason Edgecombe + +This documentation is covered by the IBM Public License Version 1.0. This +man page was written by Jason Edgecombe for OpenAFS. -- 2.39.5