merged from debian-apt
[ntk/apt.git] / apt-pkg / acquire.h
1 // -*- mode: cpp; mode: fold -*-
2 // Description /*{{{*/
3 // $Id: acquire.h,v 1.29.2.1 2003/12/24 23:09:17 mdz Exp $
4 /* ######################################################################
5
6 Acquire - File Acquiration
7
8 This module contians the Acquire system. It is responsible for bringing
9 files into the local pathname space. It deals with URIs for files and
10 URI handlers responsible for downloading or finding the URIs.
11
12 Each file to download is represented by an Acquire::Item class subclassed
13 into a specialization. The Item class can add itself to several URI
14 acquire queues each prioritized by the download scheduler. When the
15 system is run the proper URI handlers are spawned and the the acquire
16 queues are fed into the handlers by the schedular until the queues are
17 empty. This allows for an Item to be downloaded from an alternate source
18 if the first try turns out to fail. It also alows concurrent downloading
19 of multiple items from multiple sources as well as dynamic balancing
20 of load between the sources.
21
22 Schedualing of downloads is done on a first ask first get basis. This
23 preserves the order of the download as much as possible. And means the
24 fastest source will tend to process the largest number of files.
25
26 Internal methods and queues for performing gzip decompression,
27 md5sum hashing and file copying are provided to allow items to apply
28 a number of transformations to the data files they are working with.
29
30 ##################################################################### */
31 /*}}}*/
32
33 /** \defgroup acquire Acquire system {{{
34 *
35 * \brief The Acquire system is responsible for retrieving files from
36 * local or remote URIs and postprocessing them (for instance,
37 * verifying their authenticity). The core class in this system is
38 * pkgAcquire, which is responsible for managing the download queues
39 * during the download. There is at least one download queue for
40 * each supported protocol; protocols such as http may provide one
41 * queue per host.
42 *
43 * Each file to download is represented by a subclass of
44 * pkgAcquire::Item. The files add themselves to the download
45 * queue(s) by providing their URI information to
46 * pkgAcquire::Item::QueueURI, which calls pkgAcquire::Enqueue.
47 *
48 * Once the system is set up, the Run method will spawn subprocesses
49 * to handle the enqueued URIs; the scheduler will then take items
50 * from the queues and feed them into the handlers until the queues
51 * are empty.
52 *
53 * \todo Acquire supports inserting an object into several queues at
54 * once, but it is not clear what its behavior in this case is, and
55 * no subclass of pkgAcquire::Item seems to actually use this
56 * capability.
57 */ /*}}}*/
58
59 /** \addtogroup acquire
60 *
61 * @{
62 *
63 * \file acquire.h
64 */
65
66 #ifndef PKGLIB_ACQUIRE_H
67 #define PKGLIB_ACQUIRE_H
68
69 #include <apt-pkg/macros.h>
70 #include <apt-pkg/weakptr.h>
71
72 #include <vector>
73 #include <string>
74
75 using std::vector;
76 using std::string;
77
78
79 #include <sys/time.h>
80 #include <unistd.h>
81
82 class pkgAcquireStatus;
83
84 /** \brief The core download scheduler. {{{
85 *
86 * This class represents an ongoing download. It manages the lists
87 * of active and pending downloads and handles setting up and tearing
88 * down download-related structures.
89 *
90 * \todo Why all the protected data items and methods?
91 */
92 class pkgAcquire
93 {
94 private:
95 /** \brief FD of the Lock file we acquire in Setup (if any) */
96 int LockFD;
97 /** \brief dpointer placeholder (for later in case we need it) */
98 void *d;
99
100 public:
101
102 class Item;
103 class Queue;
104 class Worker;
105 struct MethodConfig;
106 struct ItemDesc;
107 friend class Item;
108 friend class Queue;
109
110 typedef vector<Item *>::iterator ItemIterator;
111 typedef vector<Item *>::const_iterator ItemCIterator;
112
113 protected:
114
115 /** \brief A list of items to download.
116 *
117 * This is built monotonically as items are created and only
118 * emptied when the download shuts down.
119 */
120 vector<Item *> Items;
121
122 /** \brief The head of the list of active queues.
123 *
124 * \todo why a hand-managed list of queues instead of std::list or
125 * std::set?
126 */
127 Queue *Queues;
128
129 /** \brief The head of the list of active workers.
130 *
131 * \todo why a hand-managed list of workers instead of std::list
132 * or std::set?
133 */
134 Worker *Workers;
135
136 /** \brief The head of the list of acquire method configurations.
137 *
138 * Each protocol (http, ftp, gzip, etc) via which files can be
139 * fetched can have a representation in this list. The
140 * configuration data is filled in by parsing the 100 Capabilities
141 * string output by a method on startup (see
142 * pkgAcqMethod::pkgAcqMethod and pkgAcquire::GetConfig).
143 *
144 * \todo why a hand-managed config dictionary instead of std::map?
145 */
146 MethodConfig *Configs;
147
148 /** \brief The progress indicator for this download. */
149 pkgAcquireStatus *Log;
150
151 /** \brief The number of files which are to be fetched. */
152 unsigned long ToFetch;
153
154 // Configurable parameters for the scheduler
155
156 /** \brief Represents the queuing strategy for remote URIs. */
157 enum QueueStrategy {
158 /** \brief Generate one queue for each protocol/host combination; downloads from
159 * multiple hosts can proceed in parallel.
160 */
161 QueueHost,
162 /** \brief Generate a single queue for each protocol; serialize
163 * downloads from multiple hosts.
164 */
165 QueueAccess} QueueMode;
166
167 /** \brief If \b true, debugging information will be dumped to std::clog. */
168 bool const Debug;
169 /** \brief If \b true, a download is currently in progress. */
170 bool Running;
171
172 /** \brief Add the given item to the list of items. */
173 void Add(Item *Item);
174
175 /** \brief Remove the given item from the list of items. */
176 void Remove(Item *Item);
177
178 /** \brief Add the given worker to the list of workers. */
179 void Add(Worker *Work);
180
181 /** \brief Remove the given worker from the list of workers. */
182 void Remove(Worker *Work);
183
184 /** \brief Insert the given fetch request into the appropriate queue.
185 *
186 * \param Item The URI to download and the item to download it
187 * for. Copied by value into the queue; no reference to Item is
188 * retained.
189 */
190 void Enqueue(ItemDesc &Item);
191
192 /** \brief Remove all fetch requests for this item from all queues. */
193 void Dequeue(Item *Item);
194
195 /** \brief Determine the fetch method and queue of a URI.
196 *
197 * \param URI The URI to fetch.
198 *
199 * \param[out] Config A location in which to place the method via
200 * which the URI is to be fetched.
201 *
202 * \return the string-name of the queue in which a fetch request
203 * for the given URI should be placed.
204 */
205 string QueueName(string URI,MethodConfig const *&Config);
206
207 /** \brief Build up the set of file descriptors upon which select() should
208 * block.
209 *
210 * The default implementation inserts the file descriptors
211 * corresponding to active downloads.
212 *
213 * \param[out] Fd The largest file descriptor in the generated sets.
214 *
215 * \param[out] RSet The set of file descriptors that should be
216 * watched for input.
217 *
218 * \param[out] WSet The set of file descriptors that should be
219 * watched for output.
220 */
221 virtual void SetFds(int &Fd,fd_set *RSet,fd_set *WSet);
222
223 /** Handle input from and output to file descriptors which select()
224 * has determined are ready. The default implementation
225 * dispatches to all active downloads.
226 *
227 * \param RSet The set of file descriptors that are ready for
228 * input.
229 *
230 * \param WSet The set of file descriptors that are ready for
231 * output.
232 */
233 virtual void RunFds(fd_set *RSet,fd_set *WSet);
234
235 /** \brief Check for idle queues with ready-to-fetch items.
236 *
237 * Called by pkgAcquire::Queue::Done each time an item is dequeued
238 * but remains on some queues; i.e., another queue should start
239 * fetching it.
240 */
241 void Bump();
242
243 public:
244
245 /** \brief Retrieve information about a fetch method by name.
246 *
247 * \param Access The name of the method to look up.
248 *
249 * \return the method whose name is Access, or \b NULL if no such method exists.
250 */
251 MethodConfig *GetConfig(string Access);
252
253 /** \brief Provides information on how a download terminated. */
254 enum RunResult {
255 /** \brief All files were fetched successfully. */
256 Continue,
257
258 /** \brief Some files failed to download. */
259 Failed,
260
261 /** \brief The download was cancelled by the user (i.e., #Log's
262 * pkgAcquireStatus::Pulse() method returned \b false).
263 */
264 Cancelled};
265
266 /** \brief Download all the items that have been Add()ed to this
267 * download process.
268 *
269 * This method will block until the download completes, invoking
270 * methods on #Log to report on the progress of the download.
271 *
272 * \param PulseInterval The method pkgAcquireStatus::Pulse will be
273 * invoked on #Log at intervals of PulseInterval milliseconds.
274 *
275 * \return the result of the download.
276 */
277 RunResult Run(int PulseInterval=500000);
278
279 /** \brief Remove all items from this download process, terminate
280 * all download workers, and empty all queues.
281 */
282 void Shutdown();
283
284 /** \brief Get the first #Worker object.
285 *
286 * \return the first active worker in this download process.
287 */
288 inline Worker *WorkersBegin() {return Workers;};
289
290 /** \brief Advance to the next #Worker object.
291 *
292 * \return the worker immediately following I, or \b NULL if none
293 * exists.
294 */
295 Worker *WorkerStep(Worker *I);
296
297 /** \brief Get the head of the list of items. */
298 inline ItemIterator ItemsBegin() {return Items.begin();};
299
300 /** \brief Get the end iterator of the list of items. */
301 inline ItemIterator ItemsEnd() {return Items.end();};
302
303 // Iterate over queued Item URIs
304 class UriIterator;
305 /** \brief Get the head of the list of enqueued item URIs.
306 *
307 * This iterator will step over every element of every active
308 * queue.
309 */
310 UriIterator UriBegin();
311 /** \brief Get the end iterator of the list of enqueued item URIs. */
312 UriIterator UriEnd();
313
314 /** Deletes each entry in the given directory that is not being
315 * downloaded by this object. For instance, when downloading new
316 * list files, calling Clean() will delete the old ones.
317 *
318 * \param Dir The directory to be cleaned out.
319 *
320 * \return \b true if the directory exists and is readable.
321 */
322 bool Clean(string Dir);
323
324 /** \return the total size in bytes of all the items included in
325 * this download.
326 */
327 unsigned long long TotalNeeded();
328
329 /** \return the size in bytes of all non-local items included in
330 * this download.
331 */
332 unsigned long long FetchNeeded();
333
334 /** \return the amount of data to be fetched that is already
335 * present on the filesystem.
336 */
337 unsigned long long PartialPresent();
338
339 /** \brief Delayed constructor
340 *
341 * \param Progress indicator associated with this download or
342 * \b NULL for none. This object is not owned by the
343 * download process and will not be deleted when the pkgAcquire
344 * object is destroyed. Naturally, it should live for at least as
345 * long as the pkgAcquire object does.
346 * \param Lock defines a lock file that should be acquired to ensure
347 * only one Acquire class is in action at the time or an empty string
348 * if no lock file should be used.
349 */
350 bool Setup(pkgAcquireStatus *Progress = NULL, string const &Lock = "");
351
352 void SetLog(pkgAcquireStatus *Progress) { Log = Progress; }
353
354 /** \brief Construct a new pkgAcquire. */
355 pkgAcquire(pkgAcquireStatus *Log) __deprecated;
356 pkgAcquire();
357
358 /** \brief Destroy this pkgAcquire object.
359 *
360 * Destroys all queue, method, and item objects associated with
361 * this download.
362 */
363 virtual ~pkgAcquire();
364
365 };
366
367 /** \brief Represents a single download source from which an item
368 * should be downloaded.
369 *
370 * An item may have several assocated ItemDescs over its lifetime.
371 */
372 struct pkgAcquire::ItemDesc : public WeakPointable
373 {
374 /** \brief The URI from which to download this item. */
375 string URI;
376 /** brief A description of this item. */
377 string Description;
378 /** brief A shorter description of this item. */
379 string ShortDesc;
380 /** brief The underlying item which is to be downloaded. */
381 Item *Owner;
382 };
383 /*}}}*/
384 /** \brief A single download queue in a pkgAcquire object. {{{
385 *
386 * \todo Why so many protected values?
387 */
388 class pkgAcquire::Queue
389 {
390 friend class pkgAcquire;
391 friend class pkgAcquire::UriIterator;
392 friend class pkgAcquire::Worker;
393
394 /** \brief dpointer placeholder (for later in case we need it) */
395 void *d;
396
397 /** \brief The next queue in the pkgAcquire object's list of queues. */
398 Queue *Next;
399
400 protected:
401
402 /** \brief A single item placed in this queue. */
403 struct QItem : pkgAcquire::ItemDesc
404 {
405 /** \brief The next item in the queue. */
406 QItem *Next;
407 /** \brief The worker associated with this item, if any. */
408 pkgAcquire::Worker *Worker;
409
410 /** \brief Assign the ItemDesc portion of this QItem from
411 * another ItemDesc
412 */
413 void operator =(pkgAcquire::ItemDesc const &I)
414 {
415 URI = I.URI;
416 Description = I.Description;
417 ShortDesc = I.ShortDesc;
418 Owner = I.Owner;
419 };
420 };
421
422 /** \brief The name of this queue. */
423 string Name;
424
425 /** \brief The head of the list of items contained in this queue.
426 *
427 * \todo why a by-hand list instead of an STL structure?
428 */
429 QItem *Items;
430
431 /** \brief The head of the list of workers associated with this queue.
432 *
433 * \todo This is plural because support exists in Queue for
434 * multiple workers. However, it does not appear that there is
435 * any way to actually associate more than one worker with a
436 * queue.
437 *
438 * \todo Why not just use a std::set?
439 */
440 pkgAcquire::Worker *Workers;
441
442 /** \brief the download scheduler with which this queue is associated. */
443 pkgAcquire *Owner;
444
445 /** \brief The number of entries in this queue that are currently
446 * being downloaded.
447 */
448 signed long PipeDepth;
449
450 /** \brief The maximum number of entries that this queue will
451 * attempt to download at once.
452 */
453 unsigned long MaxPipeDepth;
454
455 public:
456
457 /** \brief Insert the given fetch request into this queue.
458 *
459 * \return \b true if the queuing was successful. May return
460 * \b false if the Item is already in the queue
461 */
462 bool Enqueue(ItemDesc &Item);
463
464 /** \brief Remove all fetch requests for the given item from this queue.
465 *
466 * \return \b true if at least one request was removed from the queue.
467 */
468 bool Dequeue(Item *Owner);
469
470 /** \brief Locate an item in this queue.
471 *
472 * \param URI A URI to match against.
473 * \param Owner A pkgAcquire::Worker to match against.
474 *
475 * \return the first item in the queue whose URI is #URI and that
476 * is being downloaded by #Owner.
477 */
478 QItem *FindItem(string URI,pkgAcquire::Worker *Owner);
479
480 /** Presumably this should start downloading an item?
481 *
482 * \todo Unimplemented. Implement it or remove?
483 */
484 bool ItemStart(QItem *Itm,unsigned long long Size);
485
486 /** \brief Remove the given item from this queue and set its state
487 * to pkgAcquire::Item::StatDone.
488 *
489 * If this is the only queue containing the item, the item is also
490 * removed from the main queue by calling pkgAcquire::Dequeue.
491 *
492 * \param Itm The item to remove.
493 *
494 * \return \b true if no errors are encountered.
495 */
496 bool ItemDone(QItem *Itm);
497
498 /** \brief Start the worker process associated with this queue.
499 *
500 * If a worker process is already associated with this queue,
501 * this is equivalent to calling Cycle().
502 *
503 * \return \b true if the startup was successful.
504 */
505 bool Startup();
506
507 /** \brief Shut down the worker process associated with this queue.
508 *
509 * \param Final If \b true, then the process is stopped unconditionally.
510 * Otherwise, it is only stopped if it does not need cleanup
511 * as indicated by the pkgAcqMethod::NeedsCleanup member of
512 * its configuration.
513 *
514 * \return \b true.
515 */
516 bool Shutdown(bool Final);
517
518 /** \brief Send idle items to the worker process.
519 *
520 * Fills up the pipeline by inserting idle items into the worker's queue.
521 */
522 bool Cycle();
523
524 /** \brief Check for items that could be enqueued.
525 *
526 * Call this after an item placed in multiple queues has gone from
527 * the pkgAcquire::Item::StatFetching state to the
528 * pkgAcquire::Item::StatIdle state, to possibly refill an empty queue.
529 * This is an alias for Cycle().
530 *
531 * \todo Why both this and Cycle()? Are they expected to be
532 * different someday?
533 */
534 void Bump();
535
536 /** \brief Create a new Queue.
537 *
538 * \param Name The name of the new queue.
539 * \param Owner The download process that owns the new queue.
540 */
541 Queue(string Name,pkgAcquire *Owner);
542
543 /** Shut down all the worker processes associated with this queue
544 * and empty the queue.
545 */
546 virtual ~Queue();
547 };
548 /*}}}*/
549 /** \brief Iterates over all the URIs being fetched by a pkgAcquire object. {{{*/
550 class pkgAcquire::UriIterator
551 {
552 /** \brief dpointer placeholder (for later in case we need it) */
553 void *d;
554
555 /** The next queue to iterate over. */
556 pkgAcquire::Queue *CurQ;
557 /** The item that we currently point at. */
558 pkgAcquire::Queue::QItem *CurItem;
559
560 public:
561
562 inline void operator ++() {operator ++(0);};
563
564 void operator ++(int)
565 {
566 CurItem = CurItem->Next;
567 while (CurItem == 0 && CurQ != 0)
568 {
569 CurItem = CurQ->Items;
570 CurQ = CurQ->Next;
571 }
572 };
573
574 inline pkgAcquire::ItemDesc const *operator ->() const {return CurItem;};
575 inline bool operator !=(UriIterator const &rhs) const {return rhs.CurQ != CurQ || rhs.CurItem != CurItem;};
576 inline bool operator ==(UriIterator const &rhs) const {return rhs.CurQ == CurQ && rhs.CurItem == CurItem;};
577
578 /** \brief Create a new UriIterator.
579 *
580 * \param Q The queue over which this UriIterator should iterate.
581 */
582 UriIterator(pkgAcquire::Queue *Q) : CurQ(Q), CurItem(0)
583 {
584 while (CurItem == 0 && CurQ != 0)
585 {
586 CurItem = CurQ->Items;
587 CurQ = CurQ->Next;
588 }
589 }
590 virtual ~UriIterator() {};
591 };
592 /*}}}*/
593 /** \brief Information about the properties of a single acquire method. {{{*/
594 struct pkgAcquire::MethodConfig
595 {
596 /** \brief dpointer placeholder (for later in case we need it) */
597 void *d;
598
599 /** \brief The next link on the acquire method list.
600 *
601 * \todo Why not an STL container?
602 */
603 MethodConfig *Next;
604
605 /** \brief The name of this acquire method (e.g., http). */
606 string Access;
607
608 /** \brief The implementation version of this acquire method. */
609 string Version;
610
611 /** \brief If \b true, only one download queue should be created for this
612 * method.
613 */
614 bool SingleInstance;
615
616 /** \brief If \b true, this method supports pipelined downloading. */
617 bool Pipeline;
618
619 /** \brief If \b true, the worker process should send the entire
620 * APT configuration tree to the fetch subprocess when it starts
621 * up.
622 */
623 bool SendConfig;
624
625 /** \brief If \b true, this fetch method does not require network access;
626 * all files are to be acquired from the local disk.
627 */
628 bool LocalOnly;
629
630 /** \brief If \b true, the subprocess has to carry out some cleanup
631 * actions before shutting down.
632 *
633 * For instance, the cdrom method needs to unmount the CD after it
634 * finishes.
635 */
636 bool NeedsCleanup;
637
638 /** \brief If \b true, this fetch method acquires files from removable media. */
639 bool Removable;
640
641 /** \brief Set up the default method parameters.
642 *
643 * All fields are initialized to NULL, "", or \b false as
644 * appropriate.
645 */
646 MethodConfig();
647
648 /* \brief Destructor, empty currently */
649 virtual ~MethodConfig() {};
650 };
651 /*}}}*/
652 /** \brief A monitor object for downloads controlled by the pkgAcquire class. {{{
653 *
654 * \todo Why protected members?
655 */
656 class pkgAcquireStatus
657 {
658 /** \brief dpointer placeholder (for later in case we need it) */
659 void *d;
660
661 protected:
662
663 /** \brief The last time at which this monitor object was updated. */
664 struct timeval Time;
665
666 /** \brief The time at which the download started. */
667 struct timeval StartTime;
668
669 /** \brief The number of bytes fetched as of the previous call to
670 * pkgAcquireStatus::Pulse, including local items.
671 */
672 unsigned long long LastBytes;
673
674 /** \brief The current rate of download as of the most recent call
675 * to pkgAcquireStatus::Pulse, in bytes per second.
676 */
677 unsigned long long CurrentCPS;
678
679 /** \brief The number of bytes fetched as of the most recent call
680 * to pkgAcquireStatus::Pulse, including local items.
681 */
682 unsigned long long CurrentBytes;
683
684 /** \brief The total number of bytes that need to be fetched.
685 *
686 * \warning This member is inaccurate, as new items might be
687 * enqueued while the download is in progress!
688 */
689 unsigned long long TotalBytes;
690
691 /** \brief The total number of bytes accounted for by items that
692 * were successfully fetched.
693 */
694 unsigned long long FetchedBytes;
695
696 /** \brief The amount of time that has elapsed since the download
697 * started.
698 */
699 unsigned long long ElapsedTime;
700
701 /** \brief The total number of items that need to be fetched.
702 *
703 * \warning This member is inaccurate, as new items might be
704 * enqueued while the download is in progress!
705 */
706 unsigned long TotalItems;
707
708 /** \brief The number of items that have been successfully downloaded. */
709 unsigned long CurrentItems;
710
711 public:
712
713 /** \brief If \b true, the download scheduler should call Pulse()
714 * at the next available opportunity.
715 */
716 bool Update;
717
718 /** \brief If \b true, extra Pulse() invocations will be performed.
719 *
720 * With this option set, Pulse() will be called every time that a
721 * download item starts downloading, finishes downloading, or
722 * terminates with an error.
723 */
724 bool MorePulses;
725
726 /** \brief Invoked when a local or remote file has been completely fetched.
727 *
728 * \param Size The size of the file fetched.
729 *
730 * \param ResumePoint How much of the file was already fetched.
731 */
732 virtual void Fetched(unsigned long long Size,unsigned long long ResumePoint);
733
734 /** \brief Invoked when the user should be prompted to change the
735 * inserted removable media.
736 *
737 * This method should not return until the user has confirmed to
738 * the user interface that the media change is complete.
739 *
740 * \param Media The name of the media type that should be changed.
741 *
742 * \param Drive The identifying name of the drive whose media
743 * should be changed.
744 *
745 * \return \b true if the user confirms the media change, \b
746 * false if it is cancelled.
747 *
748 * \todo This is a horrible blocking monster; it should be CPSed
749 * with prejudice.
750 */
751 virtual bool MediaChange(string Media,string Drive) = 0;
752
753 /** \brief Invoked when an item is confirmed to be up-to-date.
754
755 * For instance, when an HTTP download is informed that the file on
756 * the server was not modified.
757 */
758 virtual void IMSHit(pkgAcquire::ItemDesc &/*Itm*/) {};
759
760 /** \brief Invoked when some of an item's data is fetched. */
761 virtual void Fetch(pkgAcquire::ItemDesc &/*Itm*/) {};
762
763 /** \brief Invoked when an item is successfully and completely fetched. */
764 virtual void Done(pkgAcquire::ItemDesc &/*Itm*/) {};
765
766 /** \brief Invoked when the process of fetching an item encounters
767 * a fatal error.
768 */
769 virtual void Fail(pkgAcquire::ItemDesc &/*Itm*/) {};
770
771 /** \brief Periodically invoked while the Acquire process is underway.
772 *
773 * Subclasses should first call pkgAcquireStatus::Pulse(), then
774 * update their status output. The download process is blocked
775 * while Pulse() is being called.
776 *
777 * \return \b false if the user asked to cancel the whole Acquire process.
778 *
779 * \see pkgAcquire::Run
780 */
781 virtual bool Pulse(pkgAcquire *Owner);
782
783 /** \brief Invoked when the Acquire process starts running. */
784 virtual void Start();
785
786 /** \brief Invoked when the Acquire process stops running. */
787 virtual void Stop();
788
789 /** \brief Initialize all counters to 0 and the time to the current time. */
790 pkgAcquireStatus();
791 virtual ~pkgAcquireStatus() {};
792 };
793 /*}}}*/
794 /** @} */
795
796 #endif